diff options
Diffstat (limited to 'sc/source/filter/inc/htmlpars.hxx')
-rw-r--r-- | sc/source/filter/inc/htmlpars.hxx | 630 |
1 files changed, 630 insertions, 0 deletions
diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx new file mode 100644 index 000000000..47ecc57b4 --- /dev/null +++ b/sc/source/filter/inc/htmlpars.hxx @@ -0,0 +1,630 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#pragma once + +#include <memory> +#include <map> +#include <optional> +#include <stack> +#include <string_view> +#include <unordered_map> +#include <vector> +#include <o3tl/sorted_vector.hxx> + +#include <rangelst.hxx> +#include "eeparser.hxx" + +const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options + +// Pixel tolerance for SeekOffset and related. +const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table +const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested + +// BASE class for HTML parser classes + +class ScHTMLTable; + +/** + * Collection of HTML style data parsed from the content of <style> + * elements. + */ +class ScHTMLStyles +{ + typedef std::unordered_map<OUString, OUString> PropsType; + typedef ::std::map<OUString, PropsType> NamePropsType; + typedef ::std::map<OUString, NamePropsType> ElemsType; + + NamePropsType m_GlobalProps; /// global properties (for a given class for all elements) + NamePropsType m_ElemGlobalProps; /// element global properties (no class specified) + ElemsType m_ElemProps; /// element to class to properties (both element and class are given) + const OUString maEmpty; /// just a persistent empty string. +public: + ScHTMLStyles(); + + void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName, + const OUString& aProp, const OUString& aValue); + + /** + * Find best-matching property value for given element and class names. + */ + const OUString& getPropertyValue( + const OUString& rElem, const OUString& rClass, const OUString& rPropName) const; + +private: + static void insertProp( + NamePropsType& rProps, const OUString& aName, + const OUString& aProp, const OUString& aValue); +}; + +/** Base class for HTML parser classes. */ +class ScHTMLParser : public ScEEParser +{ + ScHTMLStyles maStyles; +protected: + sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ]; + ScDocument* mpDoc; /// The destination document. + +public: + explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc ); + virtual ~ScHTMLParser() override; + + virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0; + + ScHTMLStyles& GetStyles() { return maStyles;} + ScDocument& GetDoc() { return *mpDoc;} + + /** Returns the "global table" which contains the entire HTML document. */ + virtual const ScHTMLTable* GetGlobalTable() const = 0; +}; + +typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset; + +struct ScHTMLTableStackEntry +{ + ScRangeListRef xLockedList; + std::shared_ptr<ScEEParseEntry> xCellEntry; + ScHTMLColOffset* pLocalColOffset; + sal_uLong nFirstTableCell; + SCROW nRowCnt; + SCCOL nColCntStart; + SCCOL nMaxCol; + sal_uInt16 nTable; + sal_uInt16 nTableWidth; + sal_uInt16 nColOffset; + sal_uInt16 nColOffsetStart; + bool bFirstRow; + ScHTMLTableStackEntry( const std::shared_ptr<ScEEParseEntry>& rE, + const ScRangeListRef& rL, ScHTMLColOffset* pTO, + sal_uLong nFTC, + SCROW nRow, + SCCOL nStart, SCCOL nMax, sal_uInt16 nTab, + sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS, + bool bFR ) + : xLockedList( rL ), xCellEntry(rE), + pLocalColOffset( pTO ), + nFirstTableCell( nFTC ), + nRowCnt( nRow ), + nColCntStart( nStart ), nMaxCol( nMax ), + nTable( nTab ), nTableWidth( nTW ), + nColOffset( nCO ), nColOffsetStart( nCOS ), + bFirstRow( bFR ) + {} +}; + +struct ScHTMLAdjustStackEntry +{ + SCCOL nLastCol; + SCROW nNextRow; + SCROW nCurRow; + ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow, + SCROW nCRow ) + : nLastCol( nLCol ), nNextRow( nNRow ), + nCurRow( nCRow ) + {} +}; + +class EditEngine; +class ScDocument; +class HTMLOption; + +// TODO these need better names +typedef ::std::map<SCROW, SCROW> InnerMap; +typedef ::std::map<sal_uInt16, InnerMap*> OuterMap; + +class ScHTMLLayoutParser : public ScHTMLParser +{ +private: + Size aPageSize; + OUString aBaseURL; + ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> > + aTableStack; + OUString aString; + ScRangeListRef xLockedList; // per table + std::unique_ptr<OuterMap> pTables; + ScHTMLColOffset maColOffset; + ScHTMLColOffset* pLocalColOffset; // per table + sal_uLong nFirstTableCell; // per table + short nTableLevel; + sal_uInt16 nTable; + sal_uInt16 nMaxTable; + SCCOL nColCntStart; // first Col per table + SCCOL nMaxCol; // per table + sal_uInt16 nTableWidth; // per table + sal_uInt16 nColOffset; // current, pixel + sal_uInt16 nColOffsetStart; // start value per table, in pixel + sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related + bool bFirstRow; // per table, whether in first row + bool bTabInTabCell:1; + bool bInCell:1; + bool bInTitle:1; + + DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void ); + void NewActEntry( const ScEEParseEntry* ); + static void EntryEnd( ScEEParseEntry*, const ESelection& ); + void ProcToken( HtmlImportInfo* ); + void CloseEntry( const HtmlImportInfo* ); + void NextRow( const HtmlImportInfo* ); + void SkipLocked( ScEEParseEntry*, bool bJoin = true ); + static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset, + SCCOL* pCol, sal_uInt16 nOffsetTol ); + static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset, + sal_uInt16& nWidth, sal_uInt16 nOffsetTol, + sal_uInt16 nWidthTol ); + static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset, + sal_uInt16 nWidth, sal_uInt16 nOffsetTol, + sal_uInt16 nWidthTol ); + static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset, + sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol ); + void Colonize( ScEEParseEntry* ); + sal_uInt16 GetWidth( const ScEEParseEntry* ); + void SetWidths(); + void Adjust(); + + sal_uInt16 GetWidthPixel( const HTMLOption& ); + bool IsAtBeginningOfText( const HtmlImportInfo* ); + + void TableOn( HtmlImportInfo* ); + void ColOn( HtmlImportInfo* ); + void TableRowOn( const HtmlImportInfo* ); + void TableRowOff( const HtmlImportInfo* ); + void TableDataOn( HtmlImportInfo* ); + void TableDataOff( const HtmlImportInfo* ); + void TableOff( const HtmlImportInfo* ); + void Image( HtmlImportInfo* ); + void AnchorOn( HtmlImportInfo* ); + void FontOn( HtmlImportInfo* ); + +public: + ScHTMLLayoutParser( EditEngine*, const OUString& rBaseURL, const Size& aPageSize, ScDocument* ); + virtual ~ScHTMLLayoutParser() override; + virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override; + virtual const ScHTMLTable* GetGlobalTable() const override; +}; + +// HTML DATA QUERY PARSER + +/** Declares the orientation in or for a table: column or row. */ +enum ScHTMLOrient { tdCol = 0 , tdRow = 1 }; + +/** Type for a unique identifier for each table. */ +typedef sal_uInt16 ScHTMLTableId; +/** Identifier of the "global table" (the entire HTML document). */ +const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0; +/** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */ +const ScHTMLTableId SC_HTML_NO_TABLE = 0; + +/** A 2D cell position in an HTML table. */ +struct ScHTMLPos +{ + SCCOL mnCol; + SCROW mnRow; + + explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {} + explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) : + mnCol( nCol ), mnRow( nRow ) {} + explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); } + + SCCOLROW Get( ScHTMLOrient eOrient ) const + { return (eOrient == tdCol) ? mnCol : mnRow; } + void Set( SCCOL nCol, SCROW nRow ) + { mnCol = nCol; mnRow = nRow; } + void Set( const ScAddress& rAddr ) + { Set( rAddr.Col(), rAddr.Row() ); } + ScAddress MakeAddr() const + { return ScAddress( mnCol, mnRow, 0 ); } +}; + +inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 ) +{ + return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol)); +} + +/** A 2D cell size in an HTML table. */ +struct ScHTMLSize +{ + SCCOL mnCols; + SCROW mnRows; + + explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) : + mnCols( nCols ), mnRows( nRows ) {} + void Set( SCCOL nCols, SCROW nRows ) + { mnCols = nCols; mnRows = nRows; } +}; + +/** A single entry containing a line of text or representing a table. */ +struct ScHTMLEntry : public ScEEParseEntry +{ +public: + explicit ScHTMLEntry( + const SfxItemSet& rItemSet, + ScHTMLTableId nTableId = SC_HTML_NO_TABLE ); + + /** Returns true, if the selection of the entry is empty. */ + bool IsEmpty() const { return !aSel.HasRange(); } + /** Returns true, if the entry has any content to be imported. */ + bool HasContents() const; + /** Returns true, if the entry represents a table. */ + bool IsTable() const { return nTab != SC_HTML_NO_TABLE; } + /** Returns true, if the entry represents a table. */ + ScHTMLTableId GetTableId() const { return nTab; } + + /** Sets or clears the import always state. */ + void SetImportAlways() { mbImportAlways = true; } + /** Sets start point of the entry selection to the start of the import info object. */ + void AdjustStart( const HtmlImportInfo& rInfo ); + /** Sets end point of the entry selection to the end of the import info object. */ + void AdjustEnd( const HtmlImportInfo& rInfo ); + /** Deletes leading and trailing empty paragraphs from the entry. */ + void Strip( const EditEngine& rEditEngine ); + + /** Returns read/write access to the item set of this entry. */ + SfxItemSet& GetItemSet() { return aItemSet; } + /** Returns read-only access to the item set of this entry. */ + const SfxItemSet& GetItemSet() const { return aItemSet; } + +private: + bool mbImportAlways; /// true = Always import this entry. +}; + +/** This struct handles creation of unique table identifiers. */ +struct ScHTMLTableAutoId +{ + const ScHTMLTableId mnTableId; /// The created unique table identifier. + ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable. + + /** The constructor assigns an unused identifier to member mnTableId. */ + explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId ); +}; + +class ScHTMLTableMap; + +/** Stores data for one table in an HTML document. + + This class does the main work for importing an HTML document. It manages + the correct insertion of parse entries into the correct cells and the + creation of nested tables. Recalculation of resulting document size and + position is done recursively in all nested tables. + */ +class ScHTMLTable +{ +public: + /** Creates a new HTML table without content. + @descr Internally handles a current cell position. This position is + invalid until first calls of RowOn() and DataOn(). + @param rParentTable Reference to the parent table that owns this table. + @param bPreFormText true = Table is based on preformatted text (<pre> tag). */ + explicit ScHTMLTable( + ScHTMLTable& rParentTable, + const HtmlImportInfo& rInfo, + bool bPreFormText, + const ScDocument& rDoc ); + + virtual ~ScHTMLTable(); + + /** Returns the name of the table, specified in the TABLE tag. */ + const OUString& GetTableName() const { return maTableName; } + /** Returns the caption of the table, specified in the <caption> tag. */ + const OUString& GetTableCaption() const { return maCaption; } + /** Returns the unique identifier of the table. */ + ScHTMLTableId GetTableId() const { return maTableId.mnTableId; } + /** Returns the cell spanning of the specified cell. */ + ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const; + + /** Searches in all nested tables for the specified table. + @param nTableId Unique identifier of the table. */ + ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const; + + /** Puts the item into the item set of the current entry. */ + void PutItem( const SfxPoolItem& rItem ); + /** Inserts a text portion into current entry. */ + void PutText( const HtmlImportInfo& rInfo ); + /** Inserts a new line, if in preformatted text, else does nothing. */ + void InsertPara( const HtmlImportInfo& rInfo ); + + /** Inserts a line break (<br> tag). + @descr Inserts the current entry regardless if it is empty. */ + void BreakOn(); + /** Inserts a heading line (<p> and <h*> tags). */ + void HeadingOn(); + /** Processes a hyperlink (<a> tag). */ + void AnchorOn(); + + /** Starts a *new* table nested in this table (<table> tag). + @return Pointer to the new table. */ + ScHTMLTable* TableOn( const HtmlImportInfo& rInfo ); + /** Closes *this* table (</table> tag). + @return Pointer to the parent table. */ + ScHTMLTable* TableOff( const HtmlImportInfo& rInfo ); + /** Processes the caption of the table (<caption> tag). */ + void CaptionOn(); + /** Processes the caption of the table (</caption> tag). */ + void CaptionOff(); + /** Starts a *new* table based on preformatted text (<pre> tag). + @return Pointer to the new table. */ + ScHTMLTable* PreOn( const HtmlImportInfo& rInfo ); + /** Closes *this* table based on preformatted text (</pre> tag). + @return Pointer to the parent table. */ + ScHTMLTable* PreOff( const HtmlImportInfo& rInfo ); + + /** Starts next row (<tr> tag). + @descr Cell address is invalid until first call of DataOn(). */ + void RowOn( const HtmlImportInfo& rInfo ); + /** Closes the current row (<tr> tag). + @descr Cell address is invalid until call of RowOn() and DataOn(). */ + void RowOff( const HtmlImportInfo& rInfo ); + /** Starts the next cell (<td> or <th> tag). */ + void DataOn( const HtmlImportInfo& rInfo ); + /** Closes the current cell (</td> or </th> tag). + @descr Cell address is invalid until next call of DataOn(). */ + void DataOff( const HtmlImportInfo& rInfo ); + + /** Starts the body of the HTML document (<body> tag). */ + void BodyOn( const HtmlImportInfo& rInfo ); + /** Closes the body of the HTML document (</body> tag). */ + void BodyOff( const HtmlImportInfo& rInfo ); + + /** Closes *this* table (</table> tag) or preformatted text (</pre> tag). + @descr Used to close this table object regardless on opening tag type. + @return Pointer to the parent table, or this, if no parent found. */ + ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo ); + + /** Returns the resulting document row/column count of the specified HTML row/column. */ + SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const; + /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */ + SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const; + /** Returns the total document row/column count in the specified direction. */ + SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const; + /** Returns the total document row/column count of the specified HTML cell. */ + ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const; + + /** Returns the resulting Calc position of the top left edge of the table. */ + const ScHTMLPos& GetDocPos() const { return maDocBasePos; } + /** Calculates the resulting Calc position of the specified HTML column/row. */ + SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const; + /** Calculates the resulting Calc position of the specified HTML cell. */ + ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const; + + /** Calculates the current Calc document area of this table. */ + void GetDocRange( ScRange& rRange ) const; + + /** Applies border formatting to the passed document. */ + void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const; + + SvNumberFormatter* GetFormatTable(); + +protected: + /** Creates a new HTML table without parent. + @descr This constructor is used to create the "global table". */ + explicit ScHTMLTable( + SfxItemPool& rPool, + EditEngine& rEditEngine, + std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList, + ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser, + const ScDocument& rDoc ); + + /** Fills all empty cells in this and nested tables with dummy parse entries. */ + void FillEmptyCells(); + /** Recalculates the size of all columns/rows in the table, regarding nested tables. */ + void RecalcDocSize(); + /** Recalculates the position of all cell entries and nested tables. + @param rBasePos The origin of the table in the Calc document. */ + void RecalcDocPos( const ScHTMLPos& rBasePos ); + +private: + typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr; + typedef ::std::vector< SCCOLROW > ScSizeVec; + typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector; + typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr; + + /** Returns true, if the current cell does not contain an entry yet. */ + bool IsEmptyCell() const; + /** Returns the item set from cell, row, or table, depending on current state. */ + const SfxItemSet& GetCurrItemSet() const; + + /** Returns true, if import info represents a space character. */ + static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo ); + + /** Creates and returns a new empty flying entry at position (0,0). */ + ScHTMLEntryPtr CreateEntry() const; + /** Creates a new flying entry. + @param rInfo Contains the initial edit engine selection for the entry. */ + void CreateNewEntry( const HtmlImportInfo& rInfo ); + + /** Inserts an empty line in front of the next entry. */ + void InsertLeadingEmptyLine(); + + /** Pushes the passed entry into the list of the current cell. */ + void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry ); + /** Tries to insert the entry into the current cell. + @descr If insertion is not possible (i.e., currently no cell open), the + entry will be inserted into the parent table. + @return true = Entry has been pushed into the current cell; false = Entry dropped. */ + bool PushEntry( ScHTMLEntryPtr& rxEntry ); + /** Puts the current entry into the entry list, if it is not empty. + @param rInfo The import info struct containing the end position of the current entry. + @param bLastInCell true = If cell is still empty, put this entry always. + @return true = Entry as been pushed into the current cell; false = Entry dropped. */ + bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false ); + /** Pushes a new entry into current cell which references a nested table.*/ + void PushTableEntry( ScHTMLTableId nTableId ); + + /** Tries to find a table from the table container. + @descr Assumes that the table is located in the current container or + that the passed table identifier is 0. + @param nTableId Unique identifier of the table or 0. */ + ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const; + /** Inserts a nested table in the current cell at the specified position. + @param bPreFormText true = New table is based on preformatted text (<pre> tag). */ + ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText ); + + /** Inserts a new cell in an unused position, starting from current cell position. */ + void InsertNewCell( const ScHTMLSize& rSpanSize ); + + /** Set internal states for a new table row. */ + void ImplRowOn(); + /** Set internal states for leaving a table row. */ + void ImplRowOff(); + /** Set internal states for entering a new table cell. */ + void ImplDataOn( const ScHTMLSize& rSpanSize ); + /** Set internal states for leaving a table cell. */ + void ImplDataOff(); + + /** Inserts additional formatting options from import info into the item set. */ + static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo ); + + /** Updates the document column/row size of the specified column or row. + @descr Only increases the present count, never decreases. */ + void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize ); + /** Calculates and sets the resulting size the cell needs in the document. + @descr Reduces the needed size in merged cells. + @param nCellPos The first column/row position of the (merged) cell. + @param nCellSpan The cell spanning in the specified orientation. + @param nRealDocSize The raw document size of all entries of the cell. */ + void CalcNeededDocSize( + ScHTMLOrient eOrient, SCCOLROW nCellPos, + SCCOLROW nCellSpan, SCCOLROW nRealDocSize ); + +private: + ScHTMLTable* mpParentTable; /// Pointer to parent table. + ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables. + OUString maTableName; /// Table name from <table id> option. + OUString maCaption; /// Caption name of the table from <caption> </caption> + OUStringBuffer maCaptionBuffer; /// Caption buffer of the table from <caption> </caption> + ScHTMLTableAutoId maTableId; /// Unique identifier of this table. + SfxItemSet maTableItemSet; /// Items for the entire table. + std::optional<SfxItemSet> moRowItemSet; /// Items for the current table row. + std::optional<SfxItemSet> moDataItemSet; /// Items for the current cell. + ScRangeList maHMergedCells; /// List of all horizontally merged cells. + ScRangeList maVMergedCells; /// List of all vertically merged cells. + ScRangeList maUsedCells; /// List of all used cells. + EditEngine& mrEditEngine; /// Edit engine (from ScEEParser). + std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList; /// List that owns the parse entries (from ScEEParser). + std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap; /// List of entries for each cell. + ScHTMLEntryVector* mpCurrEntryVector; /// Current entry vector from map for faster access. + ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list. + ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row. + ScHTMLSize maSize; /// Size of the table. + ScHTMLPos maCurrCell; /// Address of current cell to fill. + ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document. + ScHTMLParser* mpParser; + const ScDocument& mrDoc; + bool mbBorderOn:1; /// true = Table borders on. + bool mbPreFormText:1; /// true = Table from preformatted text (<pre> tag). + bool mbRowOn:1; /// true = Inside of <tr> </tr>. + bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>. + bool mbPushEmptyLine:1; /// true = Insert empty line before current entry. + bool mbCaptionOn:1; /// true = Inside of <caption> </caption> +}; + +/** The "global table" representing the entire HTML document. */ +class ScHTMLGlobalTable : public ScHTMLTable +{ +public: + explicit ScHTMLGlobalTable( + SfxItemPool& rPool, + EditEngine& rEditEngine, + std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList, + ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser, + const ScDocument& rDoc ); + + virtual ~ScHTMLGlobalTable() override; + + /** Recalculates sizes and resulting positions of all document entries. */ + void Recalc(); +}; + +/** The HTML parser for data queries. Focuses on data import, not on layout. + + Builds the table structure correctly, ignores extended formatting like + pictures or column widths. + */ +class ScHTMLQueryParser : public ScHTMLParser +{ +public: + explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc ); + virtual ~ScHTMLQueryParser() override; + + virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override; + + /** Returns the "global table" which contains the entire HTML document. */ + virtual const ScHTMLTable* GetGlobalTable() const override; + +private: + /** Handles all possible tags in the HTML document. */ + void ProcessToken( const HtmlImportInfo& rInfo ); + /** Inserts a text portion into current entry. */ + void InsertText( const HtmlImportInfo& rInfo ); + /** Processes the <font> tag. */ + void FontOn( const HtmlImportInfo& rInfo ); + + /** Processes the <meta> tag. */ + void MetaOn( const HtmlImportInfo& rInfo ); + /** Opens the title of the HTML document (<title> tag). */ + void TitleOn(); + /** Closes the title of the HTML document (</title> tag). */ + void TitleOff( const HtmlImportInfo& rInfo ); + + /** Opens a new table at the current position. */ + void TableOn( const HtmlImportInfo& rInfo ); + /** Closes the current table. */ + void TableOff( const HtmlImportInfo& rInfo ); + /** Opens a new table based on preformatted text. */ + void PreOn( const HtmlImportInfo& rInfo ); + /** Closes the current preformatted text table. */ + void PreOff( const HtmlImportInfo& rInfo ); + + /** Closes the current table, regardless on opening tag. */ + void CloseTable( const HtmlImportInfo& rInfo ); + + void ParseStyle(std::u16string_view rStrm); + + DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void ); + +private: + typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr; + + OUStringBuffer maTitle; /// The title of the document. + ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document. + ScHTMLTable* mpCurrTable; /// Pointer to current table (performance). + ScHTMLTableId mnUnusedId; /// First unused table identifier. + bool mbTitleOn; /// true = Inside of <title> </title>. +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |