summaryrefslogtreecommitdiffstats
path: root/sc/source/filter/inc/htmlpars.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'sc/source/filter/inc/htmlpars.hxx')
-rw-r--r--sc/source/filter/inc/htmlpars.hxx630
1 files changed, 630 insertions, 0 deletions
diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx
new file mode 100644
index 000000000..47ecc57b4
--- /dev/null
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -0,0 +1,630 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#pragma once
+
+#include <memory>
+#include <map>
+#include <optional>
+#include <stack>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+#include <o3tl/sorted_vector.hxx>
+
+#include <rangelst.hxx>
+#include "eeparser.hxx"
+
+const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options
+
+// Pixel tolerance for SeekOffset and related.
+const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
+const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
+
+// BASE class for HTML parser classes
+
+class ScHTMLTable;
+
+/**
+ * Collection of HTML style data parsed from the content of <style>
+ * elements.
+ */
+class ScHTMLStyles
+{
+ typedef std::unordered_map<OUString, OUString> PropsType;
+ typedef ::std::map<OUString, PropsType> NamePropsType;
+ typedef ::std::map<OUString, NamePropsType> ElemsType;
+
+ NamePropsType m_GlobalProps; /// global properties (for a given class for all elements)
+ NamePropsType m_ElemGlobalProps; /// element global properties (no class specified)
+ ElemsType m_ElemProps; /// element to class to properties (both element and class are given)
+ const OUString maEmpty; /// just a persistent empty string.
+public:
+ ScHTMLStyles();
+
+ void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
+ const OUString& aProp, const OUString& aValue);
+
+ /**
+ * Find best-matching property value for given element and class names.
+ */
+ const OUString& getPropertyValue(
+ const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
+
+private:
+ static void insertProp(
+ NamePropsType& rProps, const OUString& aName,
+ const OUString& aProp, const OUString& aValue);
+};
+
+/** Base class for HTML parser classes. */
+class ScHTMLParser : public ScEEParser
+{
+ ScHTMLStyles maStyles;
+protected:
+ sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
+ ScDocument* mpDoc; /// The destination document.
+
+public:
+ explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
+ virtual ~ScHTMLParser() override;
+
+ virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0;
+
+ ScHTMLStyles& GetStyles() { return maStyles;}
+ ScDocument& GetDoc() { return *mpDoc;}
+
+ /** Returns the "global table" which contains the entire HTML document. */
+ virtual const ScHTMLTable* GetGlobalTable() const = 0;
+};
+
+typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
+
+struct ScHTMLTableStackEntry
+{
+ ScRangeListRef xLockedList;
+ std::shared_ptr<ScEEParseEntry> xCellEntry;
+ ScHTMLColOffset* pLocalColOffset;
+ sal_uLong nFirstTableCell;
+ SCROW nRowCnt;
+ SCCOL nColCntStart;
+ SCCOL nMaxCol;
+ sal_uInt16 nTable;
+ sal_uInt16 nTableWidth;
+ sal_uInt16 nColOffset;
+ sal_uInt16 nColOffsetStart;
+ bool bFirstRow;
+ ScHTMLTableStackEntry( const std::shared_ptr<ScEEParseEntry>& rE,
+ const ScRangeListRef& rL, ScHTMLColOffset* pTO,
+ sal_uLong nFTC,
+ SCROW nRow,
+ SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
+ sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
+ bool bFR )
+ : xLockedList( rL ), xCellEntry(rE),
+ pLocalColOffset( pTO ),
+ nFirstTableCell( nFTC ),
+ nRowCnt( nRow ),
+ nColCntStart( nStart ), nMaxCol( nMax ),
+ nTable( nTab ), nTableWidth( nTW ),
+ nColOffset( nCO ), nColOffsetStart( nCOS ),
+ bFirstRow( bFR )
+ {}
+};
+
+struct ScHTMLAdjustStackEntry
+{
+ SCCOL nLastCol;
+ SCROW nNextRow;
+ SCROW nCurRow;
+ ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
+ SCROW nCRow )
+ : nLastCol( nLCol ), nNextRow( nNRow ),
+ nCurRow( nCRow )
+ {}
+};
+
+class EditEngine;
+class ScDocument;
+class HTMLOption;
+
+// TODO these need better names
+typedef ::std::map<SCROW, SCROW> InnerMap;
+typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
+
+class ScHTMLLayoutParser : public ScHTMLParser
+{
+private:
+ Size aPageSize;
+ OUString aBaseURL;
+ ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> >
+ aTableStack;
+ OUString aString;
+ ScRangeListRef xLockedList; // per table
+ std::unique_ptr<OuterMap> pTables;
+ ScHTMLColOffset maColOffset;
+ ScHTMLColOffset* pLocalColOffset; // per table
+ sal_uLong nFirstTableCell; // per table
+ short nTableLevel;
+ sal_uInt16 nTable;
+ sal_uInt16 nMaxTable;
+ SCCOL nColCntStart; // first Col per table
+ SCCOL nMaxCol; // per table
+ sal_uInt16 nTableWidth; // per table
+ sal_uInt16 nColOffset; // current, pixel
+ sal_uInt16 nColOffsetStart; // start value per table, in pixel
+ sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
+ bool bFirstRow; // per table, whether in first row
+ bool bTabInTabCell:1;
+ bool bInCell:1;
+ bool bInTitle:1;
+
+ DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
+ void NewActEntry( const ScEEParseEntry* );
+ static void EntryEnd( ScEEParseEntry*, const ESelection& );
+ void ProcToken( HtmlImportInfo* );
+ void CloseEntry( const HtmlImportInfo* );
+ void NextRow( const HtmlImportInfo* );
+ void SkipLocked( ScEEParseEntry*, bool bJoin = true );
+ static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset,
+ SCCOL* pCol, sal_uInt16 nOffsetTol );
+ static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
+ sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
+ sal_uInt16 nWidthTol );
+ static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
+ sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
+ sal_uInt16 nWidthTol );
+ static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
+ sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
+ void Colonize( ScEEParseEntry* );
+ sal_uInt16 GetWidth( const ScEEParseEntry* );
+ void SetWidths();
+ void Adjust();
+
+ sal_uInt16 GetWidthPixel( const HTMLOption& );
+ bool IsAtBeginningOfText( const HtmlImportInfo* );
+
+ void TableOn( HtmlImportInfo* );
+ void ColOn( HtmlImportInfo* );
+ void TableRowOn( const HtmlImportInfo* );
+ void TableRowOff( const HtmlImportInfo* );
+ void TableDataOn( HtmlImportInfo* );
+ void TableDataOff( const HtmlImportInfo* );
+ void TableOff( const HtmlImportInfo* );
+ void Image( HtmlImportInfo* );
+ void AnchorOn( HtmlImportInfo* );
+ void FontOn( HtmlImportInfo* );
+
+public:
+ ScHTMLLayoutParser( EditEngine*, const OUString& rBaseURL, const Size& aPageSize, ScDocument* );
+ virtual ~ScHTMLLayoutParser() override;
+ virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override;
+ virtual const ScHTMLTable* GetGlobalTable() const override;
+};
+
+// HTML DATA QUERY PARSER
+
+/** Declares the orientation in or for a table: column or row. */
+enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
+
+/** Type for a unique identifier for each table. */
+typedef sal_uInt16 ScHTMLTableId;
+/** Identifier of the "global table" (the entire HTML document). */
+const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
+/** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
+const ScHTMLTableId SC_HTML_NO_TABLE = 0;
+
+/** A 2D cell position in an HTML table. */
+struct ScHTMLPos
+{
+ SCCOL mnCol;
+ SCROW mnRow;
+
+ explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
+ explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
+ mnCol( nCol ), mnRow( nRow ) {}
+ explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
+
+ SCCOLROW Get( ScHTMLOrient eOrient ) const
+ { return (eOrient == tdCol) ? mnCol : mnRow; }
+ void Set( SCCOL nCol, SCROW nRow )
+ { mnCol = nCol; mnRow = nRow; }
+ void Set( const ScAddress& rAddr )
+ { Set( rAddr.Col(), rAddr.Row() ); }
+ ScAddress MakeAddr() const
+ { return ScAddress( mnCol, mnRow, 0 ); }
+};
+
+inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
+{
+ return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
+}
+
+/** A 2D cell size in an HTML table. */
+struct ScHTMLSize
+{
+ SCCOL mnCols;
+ SCROW mnRows;
+
+ explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
+ mnCols( nCols ), mnRows( nRows ) {}
+ void Set( SCCOL nCols, SCROW nRows )
+ { mnCols = nCols; mnRows = nRows; }
+};
+
+/** A single entry containing a line of text or representing a table. */
+struct ScHTMLEntry : public ScEEParseEntry
+{
+public:
+ explicit ScHTMLEntry(
+ const SfxItemSet& rItemSet,
+ ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
+
+ /** Returns true, if the selection of the entry is empty. */
+ bool IsEmpty() const { return !aSel.HasRange(); }
+ /** Returns true, if the entry has any content to be imported. */
+ bool HasContents() const;
+ /** Returns true, if the entry represents a table. */
+ bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
+ /** Returns true, if the entry represents a table. */
+ ScHTMLTableId GetTableId() const { return nTab; }
+
+ /** Sets or clears the import always state. */
+ void SetImportAlways() { mbImportAlways = true; }
+ /** Sets start point of the entry selection to the start of the import info object. */
+ void AdjustStart( const HtmlImportInfo& rInfo );
+ /** Sets end point of the entry selection to the end of the import info object. */
+ void AdjustEnd( const HtmlImportInfo& rInfo );
+ /** Deletes leading and trailing empty paragraphs from the entry. */
+ void Strip( const EditEngine& rEditEngine );
+
+ /** Returns read/write access to the item set of this entry. */
+ SfxItemSet& GetItemSet() { return aItemSet; }
+ /** Returns read-only access to the item set of this entry. */
+ const SfxItemSet& GetItemSet() const { return aItemSet; }
+
+private:
+ bool mbImportAlways; /// true = Always import this entry.
+};
+
+/** This struct handles creation of unique table identifiers. */
+struct ScHTMLTableAutoId
+{
+ const ScHTMLTableId mnTableId; /// The created unique table identifier.
+ ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
+
+ /** The constructor assigns an unused identifier to member mnTableId. */
+ explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
+};
+
+class ScHTMLTableMap;
+
+/** Stores data for one table in an HTML document.
+
+ This class does the main work for importing an HTML document. It manages
+ the correct insertion of parse entries into the correct cells and the
+ creation of nested tables. Recalculation of resulting document size and
+ position is done recursively in all nested tables.
+ */
+class ScHTMLTable
+{
+public:
+ /** Creates a new HTML table without content.
+ @descr Internally handles a current cell position. This position is
+ invalid until first calls of RowOn() and DataOn().
+ @param rParentTable Reference to the parent table that owns this table.
+ @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
+ explicit ScHTMLTable(
+ ScHTMLTable& rParentTable,
+ const HtmlImportInfo& rInfo,
+ bool bPreFormText,
+ const ScDocument& rDoc );
+
+ virtual ~ScHTMLTable();
+
+ /** Returns the name of the table, specified in the TABLE tag. */
+ const OUString& GetTableName() const { return maTableName; }
+ /** Returns the caption of the table, specified in the <caption> tag. */
+ const OUString& GetTableCaption() const { return maCaption; }
+ /** Returns the unique identifier of the table. */
+ ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
+ /** Returns the cell spanning of the specified cell. */
+ ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
+
+ /** Searches in all nested tables for the specified table.
+ @param nTableId Unique identifier of the table. */
+ ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
+
+ /** Puts the item into the item set of the current entry. */
+ void PutItem( const SfxPoolItem& rItem );
+ /** Inserts a text portion into current entry. */
+ void PutText( const HtmlImportInfo& rInfo );
+ /** Inserts a new line, if in preformatted text, else does nothing. */
+ void InsertPara( const HtmlImportInfo& rInfo );
+
+ /** Inserts a line break (<br> tag).
+ @descr Inserts the current entry regardless if it is empty. */
+ void BreakOn();
+ /** Inserts a heading line (<p> and <h*> tags). */
+ void HeadingOn();
+ /** Processes a hyperlink (<a> tag). */
+ void AnchorOn();
+
+ /** Starts a *new* table nested in this table (<table> tag).
+ @return Pointer to the new table. */
+ ScHTMLTable* TableOn( const HtmlImportInfo& rInfo );
+ /** Closes *this* table (</table> tag).
+ @return Pointer to the parent table. */
+ ScHTMLTable* TableOff( const HtmlImportInfo& rInfo );
+ /** Processes the caption of the table (<caption> tag). */
+ void CaptionOn();
+ /** Processes the caption of the table (</caption> tag). */
+ void CaptionOff();
+ /** Starts a *new* table based on preformatted text (<pre> tag).
+ @return Pointer to the new table. */
+ ScHTMLTable* PreOn( const HtmlImportInfo& rInfo );
+ /** Closes *this* table based on preformatted text (</pre> tag).
+ @return Pointer to the parent table. */
+ ScHTMLTable* PreOff( const HtmlImportInfo& rInfo );
+
+ /** Starts next row (<tr> tag).
+ @descr Cell address is invalid until first call of DataOn(). */
+ void RowOn( const HtmlImportInfo& rInfo );
+ /** Closes the current row (<tr> tag).
+ @descr Cell address is invalid until call of RowOn() and DataOn(). */
+ void RowOff( const HtmlImportInfo& rInfo );
+ /** Starts the next cell (<td> or <th> tag). */
+ void DataOn( const HtmlImportInfo& rInfo );
+ /** Closes the current cell (</td> or </th> tag).
+ @descr Cell address is invalid until next call of DataOn(). */
+ void DataOff( const HtmlImportInfo& rInfo );
+
+ /** Starts the body of the HTML document (<body> tag). */
+ void BodyOn( const HtmlImportInfo& rInfo );
+ /** Closes the body of the HTML document (</body> tag). */
+ void BodyOff( const HtmlImportInfo& rInfo );
+
+ /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
+ @descr Used to close this table object regardless on opening tag type.
+ @return Pointer to the parent table, or this, if no parent found. */
+ ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo );
+
+ /** Returns the resulting document row/column count of the specified HTML row/column. */
+ SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
+ /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
+ SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
+ /** Returns the total document row/column count in the specified direction. */
+ SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
+ /** Returns the total document row/column count of the specified HTML cell. */
+ ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
+
+ /** Returns the resulting Calc position of the top left edge of the table. */
+ const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
+ /** Calculates the resulting Calc position of the specified HTML column/row. */
+ SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
+ /** Calculates the resulting Calc position of the specified HTML cell. */
+ ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
+
+ /** Calculates the current Calc document area of this table. */
+ void GetDocRange( ScRange& rRange ) const;
+
+ /** Applies border formatting to the passed document. */
+ void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
+
+ SvNumberFormatter* GetFormatTable();
+
+protected:
+ /** Creates a new HTML table without parent.
+ @descr This constructor is used to create the "global table". */
+ explicit ScHTMLTable(
+ SfxItemPool& rPool,
+ EditEngine& rEditEngine,
+ std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
+ ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
+ const ScDocument& rDoc );
+
+ /** Fills all empty cells in this and nested tables with dummy parse entries. */
+ void FillEmptyCells();
+ /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
+ void RecalcDocSize();
+ /** Recalculates the position of all cell entries and nested tables.
+ @param rBasePos The origin of the table in the Calc document. */
+ void RecalcDocPos( const ScHTMLPos& rBasePos );
+
+private:
+ typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
+ typedef ::std::vector< SCCOLROW > ScSizeVec;
+ typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector;
+ typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
+
+ /** Returns true, if the current cell does not contain an entry yet. */
+ bool IsEmptyCell() const;
+ /** Returns the item set from cell, row, or table, depending on current state. */
+ const SfxItemSet& GetCurrItemSet() const;
+
+ /** Returns true, if import info represents a space character. */
+ static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo );
+
+ /** Creates and returns a new empty flying entry at position (0,0). */
+ ScHTMLEntryPtr CreateEntry() const;
+ /** Creates a new flying entry.
+ @param rInfo Contains the initial edit engine selection for the entry. */
+ void CreateNewEntry( const HtmlImportInfo& rInfo );
+
+ /** Inserts an empty line in front of the next entry. */
+ void InsertLeadingEmptyLine();
+
+ /** Pushes the passed entry into the list of the current cell. */
+ void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry );
+ /** Tries to insert the entry into the current cell.
+ @descr If insertion is not possible (i.e., currently no cell open), the
+ entry will be inserted into the parent table.
+ @return true = Entry has been pushed into the current cell; false = Entry dropped. */
+ bool PushEntry( ScHTMLEntryPtr& rxEntry );
+ /** Puts the current entry into the entry list, if it is not empty.
+ @param rInfo The import info struct containing the end position of the current entry.
+ @param bLastInCell true = If cell is still empty, put this entry always.
+ @return true = Entry as been pushed into the current cell; false = Entry dropped. */
+ bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false );
+ /** Pushes a new entry into current cell which references a nested table.*/
+ void PushTableEntry( ScHTMLTableId nTableId );
+
+ /** Tries to find a table from the table container.
+ @descr Assumes that the table is located in the current container or
+ that the passed table identifier is 0.
+ @param nTableId Unique identifier of the table or 0. */
+ ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
+ /** Inserts a nested table in the current cell at the specified position.
+ @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
+ ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText );
+
+ /** Inserts a new cell in an unused position, starting from current cell position. */
+ void InsertNewCell( const ScHTMLSize& rSpanSize );
+
+ /** Set internal states for a new table row. */
+ void ImplRowOn();
+ /** Set internal states for leaving a table row. */
+ void ImplRowOff();
+ /** Set internal states for entering a new table cell. */
+ void ImplDataOn( const ScHTMLSize& rSpanSize );
+ /** Set internal states for leaving a table cell. */
+ void ImplDataOff();
+
+ /** Inserts additional formatting options from import info into the item set. */
+ static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo );
+
+ /** Updates the document column/row size of the specified column or row.
+ @descr Only increases the present count, never decreases. */
+ void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
+ /** Calculates and sets the resulting size the cell needs in the document.
+ @descr Reduces the needed size in merged cells.
+ @param nCellPos The first column/row position of the (merged) cell.
+ @param nCellSpan The cell spanning in the specified orientation.
+ @param nRealDocSize The raw document size of all entries of the cell. */
+ void CalcNeededDocSize(
+ ScHTMLOrient eOrient, SCCOLROW nCellPos,
+ SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
+
+private:
+ ScHTMLTable* mpParentTable; /// Pointer to parent table.
+ ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
+ OUString maTableName; /// Table name from <table id> option.
+ OUString maCaption; /// Caption name of the table from <caption> </caption>
+ OUStringBuffer maCaptionBuffer; /// Caption buffer of the table from <caption> </caption>
+ ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
+ SfxItemSet maTableItemSet; /// Items for the entire table.
+ std::optional<SfxItemSet> moRowItemSet; /// Items for the current table row.
+ std::optional<SfxItemSet> moDataItemSet; /// Items for the current cell.
+ ScRangeList maHMergedCells; /// List of all horizontally merged cells.
+ ScRangeList maVMergedCells; /// List of all vertically merged cells.
+ ScRangeList maUsedCells; /// List of all used cells.
+ EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
+ std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
+ std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap; /// List of entries for each cell.
+ ScHTMLEntryVector* mpCurrEntryVector; /// Current entry vector from map for faster access.
+ ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
+ ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
+ ScHTMLSize maSize; /// Size of the table.
+ ScHTMLPos maCurrCell; /// Address of current cell to fill.
+ ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
+ ScHTMLParser* mpParser;
+ const ScDocument& mrDoc;
+ bool mbBorderOn:1; /// true = Table borders on.
+ bool mbPreFormText:1; /// true = Table from preformatted text (<pre> tag).
+ bool mbRowOn:1; /// true = Inside of <tr> </tr>.
+ bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>.
+ bool mbPushEmptyLine:1; /// true = Insert empty line before current entry.
+ bool mbCaptionOn:1; /// true = Inside of <caption> </caption>
+};
+
+/** The "global table" representing the entire HTML document. */
+class ScHTMLGlobalTable : public ScHTMLTable
+{
+public:
+ explicit ScHTMLGlobalTable(
+ SfxItemPool& rPool,
+ EditEngine& rEditEngine,
+ std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
+ ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
+ const ScDocument& rDoc );
+
+ virtual ~ScHTMLGlobalTable() override;
+
+ /** Recalculates sizes and resulting positions of all document entries. */
+ void Recalc();
+};
+
+/** The HTML parser for data queries. Focuses on data import, not on layout.
+
+ Builds the table structure correctly, ignores extended formatting like
+ pictures or column widths.
+ */
+class ScHTMLQueryParser : public ScHTMLParser
+{
+public:
+ explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
+ virtual ~ScHTMLQueryParser() override;
+
+ virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override;
+
+ /** Returns the "global table" which contains the entire HTML document. */
+ virtual const ScHTMLTable* GetGlobalTable() const override;
+
+private:
+ /** Handles all possible tags in the HTML document. */
+ void ProcessToken( const HtmlImportInfo& rInfo );
+ /** Inserts a text portion into current entry. */
+ void InsertText( const HtmlImportInfo& rInfo );
+ /** Processes the <font> tag. */
+ void FontOn( const HtmlImportInfo& rInfo );
+
+ /** Processes the <meta> tag. */
+ void MetaOn( const HtmlImportInfo& rInfo );
+ /** Opens the title of the HTML document (<title> tag). */
+ void TitleOn();
+ /** Closes the title of the HTML document (</title> tag). */
+ void TitleOff( const HtmlImportInfo& rInfo );
+
+ /** Opens a new table at the current position. */
+ void TableOn( const HtmlImportInfo& rInfo );
+ /** Closes the current table. */
+ void TableOff( const HtmlImportInfo& rInfo );
+ /** Opens a new table based on preformatted text. */
+ void PreOn( const HtmlImportInfo& rInfo );
+ /** Closes the current preformatted text table. */
+ void PreOff( const HtmlImportInfo& rInfo );
+
+ /** Closes the current table, regardless on opening tag. */
+ void CloseTable( const HtmlImportInfo& rInfo );
+
+ void ParseStyle(std::u16string_view rStrm);
+
+ DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
+
+private:
+ typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
+
+ OUStringBuffer maTitle; /// The title of the document.
+ ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
+ ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
+ ScHTMLTableId mnUnusedId; /// First unused table identifier.
+ bool mbTitleOn; /// true = Inside of <title> </title>.
+};
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */