diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /helpcompiler | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'helpcompiler')
-rw-r--r-- | helpcompiler/Executable_HelpIndexer.mk | 27 | ||||
-rw-r--r-- | helpcompiler/Executable_HelpLinker.mk | 34 | ||||
-rw-r--r-- | helpcompiler/IwyuFilter_helpcompiler.yaml | 20 | ||||
-rw-r--r-- | helpcompiler/Library_helplinker.mk | 53 | ||||
-rw-r--r-- | helpcompiler/Makefile | 14 | ||||
-rw-r--r-- | helpcompiler/Module_helpcompiler.mk | 23 | ||||
-rw-r--r-- | helpcompiler/inc/BasCodeTagger.hxx | 57 | ||||
-rw-r--r-- | helpcompiler/inc/HelpCompiler.hxx | 213 | ||||
-rw-r--r-- | helpcompiler/inc/HelpLinker.hxx | 89 | ||||
-rw-r--r-- | helpcompiler/source/BasCodeTagger.cxx | 230 | ||||
-rw-r--r-- | helpcompiler/source/HelpCompiler.cxx | 508 | ||||
-rw-r--r-- | helpcompiler/source/HelpIndexer.cxx | 210 | ||||
-rw-r--r-- | helpcompiler/source/HelpIndexer_main.cxx | 92 | ||||
-rw-r--r-- | helpcompiler/source/HelpLinker.cxx | 942 | ||||
-rw-r--r-- | helpcompiler/source/HelpLinker_main.cxx | 46 | ||||
-rw-r--r-- | helpcompiler/source/HelpSearch.cxx | 55 | ||||
-rw-r--r-- | helpcompiler/source/LuceneHelper.cxx | 39 | ||||
-rw-r--r-- | helpcompiler/source/LuceneHelper.hxx | 27 |
18 files changed, 2679 insertions, 0 deletions
diff --git a/helpcompiler/Executable_HelpIndexer.mk b/helpcompiler/Executable_HelpIndexer.mk new file mode 100644 index 0000000000..9b2a7657e9 --- /dev/null +++ b/helpcompiler/Executable_HelpIndexer.mk @@ -0,0 +1,27 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,HelpIndexer)) + +$(eval $(call gb_Executable_use_libraries,HelpIndexer,\ + sal \ + helplinker \ +)) + +$(eval $(call gb_Executable_use_externals,HelpIndexer,\ + expat \ + libxslt \ + clucene \ +)) + +$(eval $(call gb_Executable_add_exception_objects,HelpIndexer,\ + helpcompiler/source/HelpIndexer_main \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/helpcompiler/Executable_HelpLinker.mk b/helpcompiler/Executable_HelpLinker.mk new file mode 100644 index 0000000000..2825aadf3b --- /dev/null +++ b/helpcompiler/Executable_HelpLinker.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,HelpLinker)) + +$(eval $(call gb_Executable_set_include,HelpLinker,\ + -I$(SRCDIR)/helpcompiler/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,HelpLinker,\ + sal \ + helplinker \ +)) + +$(eval $(call gb_Executable_use_externals,HelpLinker,\ + boost_headers \ + expat \ + libxslt \ + libxml2 \ + clucene \ +)) + +$(eval $(call gb_Executable_add_exception_objects,HelpLinker,\ + helpcompiler/source/HelpLinker_main \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/helpcompiler/IwyuFilter_helpcompiler.yaml b/helpcompiler/IwyuFilter_helpcompiler.yaml new file mode 100644 index 0000000000..ddb88e6aa3 --- /dev/null +++ b/helpcompiler/IwyuFilter_helpcompiler.yaml @@ -0,0 +1,20 @@ +--- +assumeFilename: helpcompiler/source/HelpCompiler.cxx +excludelist: + helpcompiler/inc/HelpCompiler.hxx: + # Needed on WIN32 + - o3tl/char16_t2wchar_t.hxx + helpcompiler/source/LuceneHelper.hxx: + # Better to keep the generic header, MSVC problem + - CLucene.h + helpcompiler/source/HelpIndexer.cxx: + # Better to keep the generic, details are order-sensitive + - CLucene.h + helpcompiler/source/HelpSearch.cxx: + # Better to keep the generic, details are order-sensitive + - CLucene.h + # Don't propose hxx -> h change in URE libs + - osl/thread.hxx + helpcompiler/source/HelpLinker.cxx: + # Needed on WIN32 + - o3tl/char16_t2wchar_t.hxx diff --git a/helpcompiler/Library_helplinker.mk b/helpcompiler/Library_helplinker.mk new file mode 100644 index 0000000000..cd45eb0202 --- /dev/null +++ b/helpcompiler/Library_helplinker.mk @@ -0,0 +1,53 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,helplinker)) + +$(eval $(call gb_Library_set_include,helplinker,\ + -I$(SRCDIR)/helpcompiler/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Library_add_defs,helplinker,\ + -DL10N_DLLIMPLEMENTATION \ + -DHELPLINKER_DLLIMPLEMENTATION \ +)) + +$(eval $(call gb_Library_use_libraries,helplinker,\ + sal \ + comphelper \ +)) + +$(eval $(call gb_Library_use_internal_api,helplinker,\ + udkapi \ + offapi \ +)) + + +$(eval $(call gb_Library_use_externals,helplinker,\ + boost_headers \ + expat \ + libxslt \ + libxml2 \ + clucene \ +)) + +$(eval $(call gb_Library_add_exception_objects,helplinker,\ + helpcompiler/source/HelpCompiler \ + helpcompiler/source/LuceneHelper \ + helpcompiler/source/HelpIndexer \ + helpcompiler/source/HelpSearch \ + helpcompiler/source/BasCodeTagger \ +)) + +$(eval $(call gb_Library_add_exception_objects,helplinker,\ + helpcompiler/source/HelpLinker \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/helpcompiler/Makefile b/helpcompiler/Makefile new file mode 100644 index 0000000000..0997e62848 --- /dev/null +++ b/helpcompiler/Makefile @@ -0,0 +1,14 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/helpcompiler/Module_helpcompiler.mk b/helpcompiler/Module_helpcompiler.mk new file mode 100644 index 0000000000..344faba3a1 --- /dev/null +++ b/helpcompiler/Module_helpcompiler.mk @@ -0,0 +1,23 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,helpcompiler)) + +$(eval $(call gb_Module_add_targets,helpcompiler,\ + $(call gb_Helper_optionals_or,HELPTOOLS XMLHELP,Library_helplinker) \ +)) + +$(eval $(call gb_Module_add_targets_for_build,helpcompiler,\ + $(call gb_Helper_optional,HELPTOOLS, \ + Executable_HelpIndexer \ + Executable_HelpLinker \ + ) \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/helpcompiler/inc/BasCodeTagger.hxx b/helpcompiler/inc/BasCodeTagger.hxx new file mode 100644 index 0000000000..bf72ce0028 --- /dev/null +++ b/helpcompiler/inc/BasCodeTagger.hxx @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include <deque> +#include <memory> +#include <vector> +#include <libxml/parser.h> +#include <comphelper/syntaxhighlight.hxx> + +class LibXmlTreeWalker; + +//!Tagger class. +class BasicCodeTagger +{ + private: + xmlDocPtr m_pDocument; + std::vector<xmlNodePtr> m_BasicCodeContainerTags; + std::unique_ptr<LibXmlTreeWalker> m_pXmlTreeWalker; + SyntaxHighlighter m_Highlighter; + bool m_bTaggingCompleted; + void tagParagraph( xmlNodePtr paragraph ); + static xmlChar* getTypeString( TokenType tokenType ); + void getBasicCodeContainerNodes(); + void tagBasCodeParagraphs(); + + public: + enum TaggerException { NULL_DOCUMENT, EMPTY_DOCUMENT }; + BasicCodeTagger( xmlDocPtr rootDoc ); + ~BasicCodeTagger(); + void tagBasicCodes(); +}; + +//================LibXmlTreeWalker=========================================================== + +class LibXmlTreeWalker +{ + private: + xmlNodePtr m_pCurrentNode; + std::deque<xmlNodePtr> m_Queue; //!Queue for breath-first search + + public: + LibXmlTreeWalker( xmlDocPtr doc ); + void nextNode(); + xmlNodePtr currentNode() { return m_pCurrentNode;} + bool end() const; + void ignoreCurrNodesChildren(); +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/HelpCompiler.hxx b/helpcompiler/inc/HelpCompiler.hxx new file mode 100644 index 0000000000..985d207529 --- /dev/null +++ b/helpcompiler/inc/HelpCompiler.hxx @@ -0,0 +1,213 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#pragma once + +#include <sal/config.h> + +#include <deque> +#include <memory> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <libxml/parser.h> + +#include <rtl/ustring.hxx> +#include <rtl/character.hxx> +#include <osl/process.h> +#include <osl/file.hxx> +#include <osl/thread.h> +#include <o3tl/char16_t2wchar_t.hxx> + +#include <helpcompiler/compilehelp.hxx> + +#if OSL_DEBUG_LEVEL > 2 + #include <iostream> + #define HCDBG(foo) do { if (true) foo; } while(false) +#else + #define HCDBG(foo) do { } while(false) +#endif + +namespace fs +{ + enum convert { native }; + class path + { + public: + OUString data; + public: + path() {} + path(const std::string &in, convert) + { + OUString sWorkingDir; + osl_getProcessWorkingDir(&sWorkingDir.pData); + OUString ustrSystemPath(OStringToOUString(in, FileNameEnc())); + osl::File::getFileURLFromSystemPath(ustrSystemPath, data); + (void)osl::File::getAbsoluteFileURL(sWorkingDir, data, data); + } + path(const std::string &FileURL) + { + data = OStringToOUString(FileURL, FileNameEnc()); + } + std::string native_file_string() const + { + OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(data, ustrSystemPath); + OString tmp(OUStringToOString(ustrSystemPath, FileNameEnc())); + HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl); + return std::string(tmp); + } +#ifdef _WIN32 + std::wstring native_file_string_w() const + { + OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(data, ustrSystemPath); + return std::wstring(o3tl::toW(ustrSystemPath.getStr())); + } +#endif + std::string toUTF8() const + { + OString tmp(OUStringToOString(data, RTL_TEXTENCODING_UTF8)); + return std::string(tmp); + } + bool empty() const { return data.isEmpty(); } + path operator/(const std::string &in) const + { + path ret(*this); + HCDBG(std::cerr << "orig was " << + OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); + OUString ustrSystemPath(OStringToOUString(in, FileNameEnc())); + ret.data += "/" + ustrSystemPath; + HCDBG(std::cerr << "final is " << + OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl); + return ret; + } + void append(const char *in) + { + OUString ustrSystemPath(OStringToOUString(in, FileNameEnc())); + data += ustrSystemPath; + } + void append(const std::string &in) { append(in.c_str()); } + + private: +#ifdef _WIN32 + // On Windows, libxslt and libxml use UTF-8 path strings + static constexpr rtl_TextEncoding FileNameEnc() { return RTL_TEXTENCODING_UTF8; } +#else + static rtl_TextEncoding FileNameEnc() { return osl_getThreadTextEncoding(); } +#endif + + }; + + void create_directory(const fs::path& indexDirName); + void copy(const fs::path &src, const fs::path &dest); +} + + +typedef std::unordered_map<std::string, std::string> Stringtable; +typedef std::deque<std::string> LinkedList; + +typedef std::unordered_map<std::string, LinkedList> Hashtable; + +class StreamTable +{ +public: + std::string document_path; + std::string document_module; + std::string document_title; + + std::unique_ptr< std::vector<std::string> > appl_hidlist; + std::unique_ptr<Hashtable> appl_keywords; + std::unique_ptr<Stringtable> appl_helptexts; + xmlDocPtr appl_doc; + + StreamTable() : + appl_doc(nullptr) + {} + void dropappl() + { + appl_hidlist.reset(); + appl_keywords.reset(); + appl_helptexts.reset(); + if (appl_doc) xmlFreeDoc(appl_doc); + } + ~StreamTable() + { + dropappl(); + } +}; + +struct HelpProcessingException +{ + HelpProcessingErrorClass m_eErrorClass; + std::string m_aErrorMsg; + std::string m_aXMLParsingFile; + int m_nXMLParsingLine; + + HelpProcessingException( HelpProcessingErrorClass eErrorClass, std::string aErrorMsg ) + : m_eErrorClass( eErrorClass ) + , m_aErrorMsg(std::move( aErrorMsg )) + , m_nXMLParsingLine( 0 ) + {} + HelpProcessingException( std::string aErrorMsg, std::string aXMLParsingFile, int nXMLParsingLine ) + : m_eErrorClass( HelpProcessingErrorClass::XmlParsing ) + , m_aErrorMsg(std::move( aErrorMsg )) + , m_aXMLParsingFile(std::move( aXMLParsingFile )) + , m_nXMLParsingLine( nXMLParsingLine ) + {} +}; + +class HelpCompiler +{ +public: + HelpCompiler(StreamTable &streamTable, + fs::path in_inputFile, + fs::path in_src, + fs::path in_zipdir, + fs::path in_resCompactStylesheet, + fs::path in_resEmbStylesheet, + std::string in_module, + std::string in_lang, + bool in_bExtensionMode); + /// @throws HelpProcessingException + /// @throws BasicCodeTagger::TaggerException + void compile(); +private: + xmlDocPtr getSourceDocument(const fs::path &filePath); + static void tagBasicCodeExamples(xmlDocPtr doc); + xmlDocPtr compactXhpForJar(xmlDocPtr doc); + void saveXhpForJar(xmlDocPtr doc, const fs::path &filePath); + xmlNodePtr clone(xmlNodePtr node, const std::string& appl); + StreamTable &streamTable; + const fs::path inputFile, src, zipdir; + const std::string module, lang; + const fs::path resCompactStylesheet; + const fs::path resEmbStylesheet; + bool bExtensionMode; + std::string gui; +}; + +inline char tocharlower(char c) +{ + return static_cast<char>( + rtl::toAsciiLowerCase(static_cast<unsigned char>(c))); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/inc/HelpLinker.hxx b/helpcompiler/inc/HelpLinker.hxx new file mode 100644 index 0000000000..9645f7c4c6 --- /dev/null +++ b/helpcompiler/inc/HelpLinker.hxx @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#pragma once + +#include <memory> +#include <vector> +#include <helpcompiler/dllapi.h> +#include "HelpCompiler.hxx" +#include <libxslt/xsltInternals.h> + +class L10N_DLLPUBLIC IndexerPreProcessor +{ +private: + fs::path m_fsCaptionFilesDirName; + fs::path m_fsContentFilesDirName; + + xsltStylesheetPtr m_xsltStylesheetPtrCaption; + xsltStylesheetPtr m_xsltStylesheetPtrContent; + +public: + IndexerPreProcessor( const fs::path& fsIndexBaseDir, + const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet ); + ~IndexerPreProcessor(); + + void processDocument( xmlDocPtr doc, const std::string& EncodedDocPath ); +}; + +class HelpLinker +{ +public: + /// @throws HelpProcessingException + L10N_DLLPUBLIC void main(std::vector<std::string> &args, + std::string const * pExtensionPath = nullptr, + std::string const * pDestination = nullptr, + const OUString* pOfficeHelpPath = nullptr ); + + HelpLinker() + : bExtensionMode(false) + , m_bUseLangRoot(true) + , m_bCreateIndex(true) + {} + +private: + Stringtable additionalFiles; + std::vector<std::string> helpFiles; + fs::path sourceRoot; + fs::path compactStylesheet; + fs::path embeddStylesheet; + fs::path idxCaptionStylesheet; + fs::path idxContentStylesheet; + fs::path zipdir; + fs::path outputFile; + std::string extsource; + std::string extdestination; + std::string module; + std::string lang; + std::string extensionPath; + std::string extensionDestination; + bool bExtensionMode; + fs::path indexDirParentName; + std::unique_ptr<IndexerPreProcessor> m_pIndexerPreProcessor; + bool m_bUseLangRoot; + bool m_bCreateIndex; + void initIndexerPreProcessor(); + /// @throws HelpProcessingException + /// @throws BasicCodeTagger::TaggerException + void link(); + static void addBookmark( FILE* pFile_DBHelp, std::string thishid, + const std::string& fileB, const std::string& anchorB, + const std::string& jarfileB, const std::string& titleB ); +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/BasCodeTagger.cxx b/helpcompiler/source/BasCodeTagger.cxx new file mode 100644 index 0000000000..3511617f4d --- /dev/null +++ b/helpcompiler/source/BasCodeTagger.cxx @@ -0,0 +1,230 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <BasCodeTagger.hxx> +#include <rtl/ustring.hxx> +#include <iostream> + +LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc ) +{ + if ( doc == nullptr ) + throw BasicCodeTagger::NULL_DOCUMENT; + m_pCurrentNode = xmlDocGetRootElement( doc ); + if ( m_pCurrentNode == nullptr ) + throw BasicCodeTagger::EMPTY_DOCUMENT; + else if ( m_pCurrentNode->xmlChildrenNode != nullptr ) + m_Queue.push_back( m_pCurrentNode->xmlChildrenNode ); + nextNode(); +} + +void LibXmlTreeWalker::nextNode() +{ + + //next node + if ( m_pCurrentNode->next == nullptr ) + { + m_pCurrentNode = m_Queue.front(); + m_Queue.pop_front(); + } + else + m_pCurrentNode = m_pCurrentNode->next; + //queue children if they exist + if ( m_pCurrentNode->xmlChildrenNode != nullptr ) + m_Queue.push_back( m_pCurrentNode->xmlChildrenNode ); +} + +void LibXmlTreeWalker::ignoreCurrNodesChildren() +{ + if ( m_pCurrentNode->xmlChildrenNode != nullptr ) + m_Queue.pop_back(); +} + +bool LibXmlTreeWalker::end() const +{ + return m_pCurrentNode->next == nullptr && m_Queue.empty(); +} + + +BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc ): + m_Highlighter(HighlighterLanguage::Basic) +{ + if ( rootDoc == nullptr ) + throw NULL_DOCUMENT; + m_pDocument = rootDoc; + m_pXmlTreeWalker = nullptr; + m_bTaggingCompleted = false; +} + +BasicCodeTagger::~BasicCodeTagger() +{ +} +//!Gathers all the <bascode> tag nodes from xml tree. +/*! + * Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members. + */ +void BasicCodeTagger::getBasicCodeContainerNodes() +{ + xmlNodePtr currentNode; + + m_BasicCodeContainerTags.clear(); + + m_pXmlTreeWalker.reset(new LibXmlTreeWalker( m_pDocument )); + + currentNode = m_pXmlTreeWalker->currentNode(); + if ( !( xmlStrcmp( currentNode->name, reinterpret_cast<const xmlChar*>("bascode") ) ) ) + { //Found <bascode> + m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list + } + while ( !m_pXmlTreeWalker->end() ) + { + m_pXmlTreeWalker->nextNode(); + if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, reinterpret_cast<const xmlChar*>("bascode") ) ) ) + { //Found <bascode> + m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list + m_pXmlTreeWalker->ignoreCurrNodesChildren(); + } + } +} + +//! Extracts Basic Codes contained in <bascode> tags. +/*! + * For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according + * to the Basic code syntax found in that paragraph. + */ +void BasicCodeTagger::tagBasCodeParagraphs() +{ + //helper variables + xmlNodePtr currParagraph; + for (auto const& currBascodeNode : m_BasicCodeContainerTags) + { + currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph> + while ( currParagraph != nullptr ) + { + tagParagraph( currParagraph ); + currParagraph=currParagraph->next; + } + } + m_BasicCodeContainerTags.clear(); +} + +//! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code. +void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph ) +{ + //1. get paragraph text + xmlChar* codeSnippet; + codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 ); + if ( codeSnippet == nullptr ) + { + return; //no text, nothing more to do here + } + //2. delete every child from paragraph (except attributes) + xmlNodePtr curNode = paragraph->xmlChildrenNode; + xmlNodePtr sibling; + while ( curNode != nullptr ) + { + sibling = curNode->next; + xmlUnlinkNode( curNode ); + xmlFreeNode( curNode ); + curNode = sibling; + } + + //3. create new paragraph content + OUString strLine( reinterpret_cast<const char*>(codeSnippet), + strlen(reinterpret_cast<const char*>(codeSnippet)), + RTL_TEXTENCODING_UTF8 ); + std::vector<HighlightPortion> portions; + m_Highlighter.getHighlightPortions( strLine, portions ); + for (auto const& portion : portions) + { + OString sToken(OUStringToOString(strLine.subView(portion.nBegin, portion.nEnd-portion.nBegin), RTL_TEXTENCODING_UTF8)); + xmlNodePtr text = xmlNewText(reinterpret_cast<const xmlChar*>(sToken.getStr())); + if ( portion.tokenType != TokenType::Whitespace ) + { + xmlChar* typeStr = getTypeString( portion.tokenType ); + curNode = xmlNewTextChild( paragraph, nullptr, reinterpret_cast<xmlChar const *>("item"), nullptr ); + xmlNewProp( curNode, reinterpret_cast<xmlChar const *>("type"), typeStr ); + xmlAddChild( curNode, text ); + xmlFree( typeStr ); + } + else + xmlAddChild( paragraph, text ); + } + xmlFree( codeSnippet ); +} + +//! Manages tagging process. +/*! + * This is the "main" function of BasicCodeTagger. + */ +void BasicCodeTagger::tagBasicCodes() +{ + if ( m_bTaggingCompleted ) + return; + //gather <bascode> nodes + try + { + getBasicCodeContainerNodes(); + } + catch (TaggerException &ex) + { + std::cout << "BasCodeTagger error occurred. Error code:" << ex << std::endl; + } + + //tag basic code paragraphs in <bascode> tag + tagBasCodeParagraphs(); + m_bTaggingCompleted = true; +} + +//! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... > +xmlChar* BasicCodeTagger::getTypeString( TokenType tokenType ) +{ + const char* str; + switch ( tokenType ) + { + case TokenType::Unknown : + str = "unknown"; + break; + case TokenType::Identifier : + str = "identifier"; + break; + case TokenType::Whitespace : + str = "whitespace"; + break; + case TokenType::Number : + str = "number"; + break; + case TokenType::String : + str = "string"; + break; + case TokenType::EOL : + str = "eol"; + break; + case TokenType::Comment : + str = "comment"; + break; + case TokenType::Error : + str = "error"; + break; + case TokenType::Operator : + str = "operator"; + break; + case TokenType::Keywords : + str = "keyword"; + break; + case TokenType::Parameter : + str = "parameter"; + break; + default : + str = "unknown"; + break; + } + return xmlCharStrdup( str ); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpCompiler.cxx b/helpcompiler/source/HelpCompiler.cxx new file mode 100644 index 0000000000..120fb702f0 --- /dev/null +++ b/helpcompiler/source/HelpCompiler.cxx @@ -0,0 +1,508 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <algorithm> +#include <memory> +#include <HelpCompiler.hxx> +#include <BasCodeTagger.hxx> +#include <iostream> +#include <stdlib.h> +#include <string.h> +#include <libxslt/xsltInternals.h> +#include <libxslt/transform.h> +#include <rtl/character.hxx> +#include <sal/log.hxx> +#include <utility> + +HelpCompiler::HelpCompiler(StreamTable &in_streamTable, fs::path in_inputFile, + fs::path in_src, fs::path in_zipdir, fs::path in_resCompactStylesheet, + fs::path in_resEmbStylesheet, std::string in_module, std::string in_lang, + bool in_bExtensionMode) + : streamTable(in_streamTable), inputFile(std::move(in_inputFile)), + src(std::move(in_src)), zipdir(std::move(in_zipdir)), module(std::move(in_module)), lang(std::move(in_lang)), resCompactStylesheet(std::move(in_resCompactStylesheet)), + resEmbStylesheet(std::move(in_resEmbStylesheet)), bExtensionMode( in_bExtensionMode ) +{ + xmlKeepBlanksDefaultValue = 0; + char* os = getenv("OS"); + if (os) + { + gui = (strcmp(os, "WNT") == 0 ? "WIN" : (strcmp(os, "MACOSX") == 0 ? "MAC" : "UNIX")); + } +} + +void HelpCompiler::tagBasicCodeExamples( xmlDocPtr doc ) +{ + try + { + BasicCodeTagger bct( doc ); + bct.tagBasicCodes(); + } + catch ( BasicCodeTagger::TaggerException &ex ) + { + if ( ex != BasicCodeTagger::EMPTY_DOCUMENT ) + throw; + } +} + +xmlDocPtr HelpCompiler::compactXhpForJar( xmlDocPtr doc ) +{ + static xsltStylesheetPtr compact = nullptr; + static const char *params[2 + 1]; + params[0] = nullptr; + xmlDocPtr compacted; + + if (!compact) + { + compact = xsltParseStylesheetFile(BAD_CAST(resCompactStylesheet.native_file_string().c_str())); + } + + compacted = xsltApplyStylesheet(compact, doc, params); + return compacted; +} + +void HelpCompiler::saveXhpForJar( xmlDocPtr doc, const fs::path &filePath ) +{ + //save processed xhp document in ziptmp<module>_<lang>/text directory +#ifdef _WIN32 + std::string pathSep = "\\"; +#else + std::string pathSep = "/"; +#endif + const std::string& sourceXhpPath = filePath.native_file_string(); + std::string zipdirPath = zipdir.native_file_string(); + const std::string srcdirPath( src.native_file_string() ); + // srcdirPath contains trailing /, but we want the file path with / at the beginning + std::string jarXhpPath = sourceXhpPath.substr( srcdirPath.length() - 1 ); + std::string xhpFileName = jarXhpPath.substr( jarXhpPath.rfind( pathSep ) + 1 ); + jarXhpPath = jarXhpPath.substr( 0, jarXhpPath.rfind( pathSep ) ); + if ( !jarXhpPath.compare( 1, 11, "text" + pathSep + "sbasic" ) ) + { + tagBasicCodeExamples( doc ); + } + if ( !jarXhpPath.compare( 1, 11, "text" + pathSep + "shared" ) ) + { + const size_t pos = zipdirPath.find( "ziptmp" ); + if ( pos != std::string::npos ) + zipdirPath.replace( pos + 6, module.length(), "shared" ); + } + xmlDocPtr compacted = compactXhpForJar( doc ); + fs::create_directory( fs::path( zipdirPath + jarXhpPath, fs::native ) ); + if ( -1 == xmlSaveFormatFileEnc( (zipdirPath + jarXhpPath + pathSep + xhpFileName).c_str(), compacted, "utf-8", 0 ) ) + std::cerr << "Error saving file to " << (zipdirPath + jarXhpPath + pathSep + xhpFileName).c_str() << std::endl; + xmlFreeDoc(compacted); +} + +xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath) +{ + xmlDocPtr res; + if (bExtensionMode) + { + // this is the mode when used within LibreOffice for importing help + // bundled with an extension + res = xmlParseFile(filePath.native_file_string().c_str()); + } + else + { + // this is the mode when used at build time to generate LibreOffice + // help from its xhp source + static xsltStylesheetPtr cur = nullptr; + static const char *params[2 + 1]; + if (!cur) + { + static std::string fsroot('\'' + src.toUTF8() + '\''); + + cur = xsltParseStylesheetFile(BAD_CAST(resEmbStylesheet.native_file_string().c_str())); + + int nbparams = 0; + params[nbparams++] = "fsroot"; + params[nbparams++] = fsroot.c_str(); + params[nbparams] = nullptr; + } + xmlDocPtr doc = xmlParseFile(filePath.native_file_string().c_str()); + + saveXhpForJar( doc, filePath ); + + res = xsltApplyStylesheet(cur, doc, params); + xmlFreeDoc(doc); + } + return res; +} + +// returns a node representing the whole stuff compiled for the current +// application. +xmlNodePtr HelpCompiler::clone(xmlNodePtr node, const std::string& appl) +{ + xmlNodePtr root = xmlCopyNode(node, 2); + if (node->xmlChildrenNode) + { + xmlNodePtr list = node->xmlChildrenNode; + while (list) + { + if (strcmp(reinterpret_cast<const char*>(list->name), "switchinline") == 0 || strcmp(reinterpret_cast<const char*>(list->name), "switch") == 0) + { + std::string tmp; + xmlChar * prop = xmlGetProp(list, reinterpret_cast<xmlChar const *>("select")); + if (prop != nullptr) + { + if (strcmp(reinterpret_cast<char *>(prop), "sys") == 0) + { + tmp = gui; + } + else if (strcmp(reinterpret_cast<char *>(prop), "appl") == 0) + { + tmp = appl; + } + xmlFree(prop); + } + if (!tmp.empty()) + { + bool isCase=false; + xmlNodePtr caseList=list->xmlChildrenNode; + while (caseList) + { + xmlChar* select = xmlGetProp(caseList, BAD_CAST("select")); + if (select) + { + if (!strcmp(reinterpret_cast<char*>(select), tmp.c_str()) && !isCase) + { + isCase=true; + xmlNodePtr clp = caseList->xmlChildrenNode; + while (clp) + { + xmlAddChild(root, clone(clp, appl)); + clp = clp->next; + } + } + xmlFree(select); + } + else + { + if ((strcmp(reinterpret_cast<const char*>(caseList->name), "defaultinline") != 0) && (strcmp(reinterpret_cast<const char*>(caseList->name), "default") != 0)) + { + xmlAddChild(root, clone(caseList, appl)); + } + else + { + if (!isCase) + { + xmlNodePtr clp = caseList->xmlChildrenNode; + while (clp) + { + xmlAddChild(root, clone(clp, appl)); + clp = clp->next; + } + } + } + } + caseList = caseList->next; + } + } + } + else + { + xmlAddChild(root, clone(list, appl)); + } + list = list->next; + } + } + return root; +} + +namespace { + +class myparser +{ +public: + std::string documentId; + std::string fileName; + std::string title; + std::unique_ptr< std::vector<std::string> > hidlist; + std::unique_ptr<Hashtable> keywords; + std::unique_ptr<Stringtable> helptexts; +private: + std::vector<std::string> extendedHelpText; +public: + myparser(std::string indocumentId, std::string infileName, + std::string intitle) : documentId(std::move(indocumentId)), fileName(std::move(infileName)), + title(std::move(intitle)) + { + hidlist.reset(new std::vector<std::string>); + keywords.reset(new Hashtable); + helptexts.reset(new Stringtable); + } + void traverse( xmlNodePtr parentNode ); +private: + std::string dump(xmlNodePtr node); +}; + +} + +std::string myparser::dump(xmlNodePtr node) +{ + std::string app; + if (node->xmlChildrenNode) + { + xmlNodePtr list = node->xmlChildrenNode; + while (list) + { + app += dump(list); + list = list->next; + } + } + if (xmlNodeIsText(node)) + { + xmlChar *pContent = xmlNodeGetContent(node); + app += std::string(reinterpret_cast<char*>(pContent)); + xmlFree(pContent); + } + return app; +} + +static void trim(std::string& str) +{ + std::string::size_type pos = str.find_last_not_of(' '); + if(pos != std::string::npos) + { + str.erase(pos + 1); + pos = str.find_first_not_of(' '); + if(pos != std::string::npos) + str.erase(0, pos); + } + else + str.clear(); +} + +void myparser::traverse( xmlNodePtr parentNode ) +{ + // traverse all nodes that belong to the parent + xmlNodePtr test ; + for (test = parentNode->xmlChildrenNode; test; test = test->next) + { + if (fileName.empty() && !strcmp(reinterpret_cast<const char*>(test->name), "filename")) + { + xmlNodePtr node = test->xmlChildrenNode; + if (xmlNodeIsText(node)) + { + xmlChar *pContent = xmlNodeGetContent(node); + fileName = std::string(reinterpret_cast<char*>(pContent)); + xmlFree(pContent); + } + } + else if (title.empty() && !strcmp(reinterpret_cast<const char*>(test->name), "title")) + { + title = dump(test); + if (title.empty()) + title = "<notitle>"; + } + else if (!strcmp(reinterpret_cast<const char*>(test->name), "bookmark")) + { + xmlChar* branchxml = xmlGetProp(test, BAD_CAST("branch")); + if (branchxml == nullptr) { + throw HelpProcessingException( + HelpProcessingErrorClass::XmlParsing, "bookmark lacks branch attribute"); + } + std::string branch(reinterpret_cast<char*>(branchxml)); + xmlFree (branchxml); + xmlChar* idxml = xmlGetProp(test, BAD_CAST("id")); + if (idxml == nullptr) { + throw HelpProcessingException( + HelpProcessingErrorClass::XmlParsing, "bookmark lacks id attribute"); + } + std::string anchor(reinterpret_cast<char*>(idxml)); + xmlFree (idxml); + + if (branch.compare(0, 3, "hid") == 0) + { + size_t index = branch.find('/'); + if (index != std::string::npos) + { + auto hid = branch.substr(1 + index); + // one shall serve as a documentId + if (documentId.empty()) + documentId = hid; + extendedHelpText.push_back(hid); + HCDBG(std::cerr << "hid pushback" << (anchor.empty() ? hid : hid + "#" + anchor) << std::endl); + hidlist->push_back( anchor.empty() ? hid : hid + "#" + anchor); + } + else + continue; + } + else if (branch.compare("index") == 0) + { + LinkedList ll; + + for (xmlNodePtr nd = test->xmlChildrenNode; nd; nd = nd->next) + { + if (strcmp(reinterpret_cast<const char*>(nd->name), "bookmark_value")) + continue; + + std::string embedded; + xmlChar* embeddedxml = xmlGetProp(nd, BAD_CAST("embedded")); + if (embeddedxml) + { + embedded = std::string(reinterpret_cast<char*>(embeddedxml)); + xmlFree (embeddedxml); + std::transform (embedded.begin(), embedded.end(), + embedded.begin(), tocharlower); + } + + bool isEmbedded = !embedded.empty() && embedded.compare("true") == 0; + if (isEmbedded) + continue; + + std::string keyword = dump(nd); + size_t keywordSem = keyword.find(';'); + if (keywordSem != std::string::npos) + { + std::string tmppre = + keyword.substr(0,keywordSem); + trim(tmppre); + std::string tmppos = + keyword.substr(1+keywordSem); + trim(tmppos); + keyword = tmppre + ";" + tmppos; + } + ll.push_back(keyword); + } + if (!ll.empty()) + (*keywords)[anchor] = ll; + } + else if (branch.compare("contents") == 0) + { + // currently not used + } + } + else if (!strcmp(reinterpret_cast<const char*>(test->name), "ahelp")) + { + //tool-tip + std::string text = dump(test); + std::replace(text.begin(), text.end(), '\n', ' '); + trim(text); + + //tool-tip target + std::string hidstr("."); //. == previous seen hid bookmarks + xmlChar* hid = xmlGetProp(test, BAD_CAST("hid")); + if (hid) + { + hidstr = std::string(reinterpret_cast<char*>(hid)); + xmlFree (hid); + } + + if (hidstr != "." && !hidstr.empty()) //simple case of explicitly named target + { + assert(!hidstr.empty()); + (*helptexts)[hidstr] = text; + } + else //apply to list of "current" hids determined by recent bookmarks that have hid in their branch + { + //TODO: make these asserts and flush out all our broken help ids + SAL_WARN_IF(hidstr.empty(), "helpcompiler", "hid='' for text:" << text); + SAL_WARN_IF(!hidstr.empty() && extendedHelpText.empty(), "helpcompiler", "hid='.' with no hid bookmark branches in file: " << fileName + " for text: " << text); + for (const std::string& name : extendedHelpText) + { + (*helptexts)[name] = text; + } + } + extendedHelpText.clear(); + } + // traverse children + traverse(test); + } +} + +void HelpCompiler::compile() +{ + // we now have the jaroutputstream, which will contain the document. + // now determine the document as a dom tree in variable docResolved + + xmlDocPtr docResolvedOrg = getSourceDocument(inputFile); + + // now add path to the document + // resolve the dom + + if (!docResolvedOrg) + { + std::stringstream aStrStream; + aStrStream << "ERROR: file not existing: " << inputFile.native_file_string().c_str() << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + std::string documentId; + std::string fileName; + std::string title; + // returns a clone of the document with switch-cases resolved + std::string appl = module.substr(1); + for (char & i : appl) + { + i=rtl::toAsciiUpperCase(static_cast<unsigned char>(i)); + } + xmlNodePtr docResolved = clone(xmlDocGetRootElement(docResolvedOrg), appl); + myparser aparser(documentId, fileName, title); + aparser.traverse(docResolved); + documentId = aparser.documentId; + fileName = aparser.fileName; + title = aparser.title; + + HCDBG(std::cerr << documentId << " : " << fileName << " : " << title << std::endl); + + xmlDocPtr docResolvedDoc = xmlCopyDoc(docResolvedOrg, false); + xmlDocSetRootElement(docResolvedDoc, docResolved); + + streamTable.dropappl(); + streamTable.appl_doc = docResolvedDoc; + streamTable.appl_hidlist = std::move(aparser.hidlist); + streamTable.appl_helptexts = std::move(aparser.helptexts); + streamTable.appl_keywords = std::move(aparser.keywords); + + streamTable.document_path = fileName; + streamTable.document_title = title; + std::string actMod = module; + + if ( !bExtensionMode && !fileName.empty()) + { + if (fileName.compare(0, 6, "/text/") == 0) + { + actMod = fileName.substr(strlen("/text/")); + actMod = actMod.substr(0, actMod.find('/')); + } + } + streamTable.document_module = actMod; + xmlFreeDoc(docResolvedOrg); +} + +namespace fs +{ + void create_directory(const fs::path& indexDirName) + { + HCDBG( + std::cerr << "creating " << + OUStringToOString(indexDirName.data, RTL_TEXTENCODING_UTF8).getStr() + << std::endl + ); + osl::Directory::createPath(indexDirName.data); + } + + void copy(const fs::path &src, const fs::path &dest) + { + osl::File::copy(src.data, dest.data); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpIndexer.cxx b/helpcompiler/source/HelpIndexer.cxx new file mode 100644 index 0000000000..65e46743b4 --- /dev/null +++ b/helpcompiler/source/HelpIndexer.cxx @@ -0,0 +1,210 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <helpcompiler/HelpIndexer.hxx> + +#include <rtl/string.hxx> +#include <rtl/uri.hxx> +#include <o3tl/runtimetooustring.hxx> +#include <osl/file.hxx> +#include <osl/thread.h> +#include <o3tl/string_view.hxx> +#include <memory> +#include <utility> + +#include "LuceneHelper.hxx" +#include <CLucene.h> +#include <CLucene/analysis/LanguageBasedAnalyzer.h> + +#if defined _WIN32 +#include <o3tl/char16_t2wchar_t.hxx> +#include <prewin.h> +#include <postwin.h> +#endif + +using namespace lucene::document; + +HelpIndexer::HelpIndexer(OUString lang, OUString module, + std::u16string_view srcDir, std::u16string_view outDir) + : d_lang(std::move(lang)), d_module(std::move(module)) +{ + d_indexDir = outDir + OUStringChar('/') + d_module + ".idxl"; + osl_getAbsoluteFileURL(nullptr, d_indexDir.pData, &d_indexDir.pData); + d_captionDir = OUString::Concat(srcDir) + "/caption"; + osl_getAbsoluteFileURL(nullptr, d_captionDir.pData, &d_captionDir.pData); + d_contentDir = OUString::Concat(srcDir) + "/content"; + osl_getAbsoluteFileURL(nullptr, d_contentDir.pData, &d_contentDir.pData); +} + +#if defined _WIN32 +namespace +{ +template <class Constructor> +auto TryWithUnicodePathWorkaround(const OUString& ustrPath, const Constructor& constructor) +{ + const rtl_TextEncoding eThreadEncoding = osl_getThreadTextEncoding(); + OString sPath = OUStringToOString(ustrPath, eThreadEncoding); + try + { + // First try path in thread encoding (ACP in case of Windows). + return constructor(sPath); + } + catch (const CLuceneError&) + { + // Maybe the path contains characters not representable in ACP. There's no API in lucene + // that takes Unicode strings (they take 8-bit strings, and pass them to CRT library + // functions without conversion). + + // For a workaround, try short name, which should only contain ASCII characters. Would + // not help (i.e., would return original long name) if short (8.3) file name creation is + // disabled in OS or volume settings. + wchar_t buf[32767]; + if (GetShortPathNameW(o3tl::toW(ustrPath.getStr()), buf, std::size(buf)) == 0) + throw; + sPath = OUStringToOString(o3tl::toU(buf), eThreadEncoding); + return constructor(sPath); + } +} +} +#endif + +bool HelpIndexer::indexDocuments() +{ + if (!scanForFiles()) + return false; + + try + { + std::u16string_view sLang = o3tl::getToken(d_lang, 0, '-'); + bool bUseCJK = sLang == u"ja" || sLang == u"ko" || sLang == u"zh"; + + // Construct the analyzer appropriate for the given language + std::unique_ptr<lucene::analysis::Analyzer> analyzer; + if (bUseCJK) + analyzer.reset(new lucene::analysis::LanguageBasedAnalyzer(L"cjk")); + else + analyzer.reset(new lucene::analysis::standard::StandardAnalyzer()); + + OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(d_indexDir, ustrSystemPath); + +#if defined _WIN32 + // Make sure the path exists, or GetShortPathNameW (if attempted) will fail. + osl::Directory::createPath(d_indexDir); + auto writer = TryWithUnicodePathWorkaround(ustrSystemPath, [&analyzer](const OString& s) { + return std::make_unique<lucene::index::IndexWriter>(s.getStr(), analyzer.get(), true); + }); +#else + OString indexDirStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding()); + auto writer = std::make_unique<lucene::index::IndexWriter>(indexDirStr.getStr(), + analyzer.get(), true); +#endif + +#ifndef SYSTEM_CLUCENE + // avoid random values in index file, making help indices reproducible + writer->setSegmentInfoStartVersion(0); +#endif + + //Double limit of tokens allowed, otherwise we'll get a too-many-tokens + //exception for ja help. Could alternative ignore the exception and get + //truncated results as per java-Lucene apparently + writer->setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2); + + // Index the identified help files + Document doc; + for (auto const& elem : d_files) + { + helpDocument(elem, &doc); + writer->addDocument(&doc); + doc.clear(); + } + + // Optimize the index + writer->optimize(); + } + catch (CLuceneError &e) + { + d_error = o3tl::runtimeToOUString(e.what()); + return false; + } + + return true; +} + + +bool HelpIndexer::scanForFiles() { + if (!scanForFiles(d_contentDir)) { + return false; + } + if (!scanForFiles(d_captionDir)) { + return false; + } + return true; +} + +bool HelpIndexer::scanForFiles(OUString const & path) { + + osl::Directory dir(path); + if (osl::FileBase::E_None != dir.open()) { + d_error = "Error reading directory " + path; + return false; + } + + osl::DirectoryItem item; + osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type); + while (dir.getNextItem(item) == osl::FileBase::E_None) { + item.getFileStatus(fileStatus); + if (fileStatus.getFileType() == osl::FileStatus::Regular) { + d_files.insert(fileStatus.getFileName()); + } + } + + return true; +} + +void HelpIndexer::helpDocument(OUString const & fileName, Document *doc) const { + // Add the help path as an indexed, untokenized field. + + OUString path = "#HLP#" + d_module + "/" + fileName; + std::vector<TCHAR> aPath(OUStringToTCHARVec(path)); + doc->add(*_CLNEW Field(_T("path"), aPath.data(), int(Field::STORE_YES) | int(Field::INDEX_UNTOKENIZED))); + + OUString sEscapedFileName = + rtl::Uri::encode(fileName, + rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8); + + // Add the caption as a field. + OUString captionPath = d_captionDir + "/" + sEscapedFileName; + doc->add(*_CLNEW Field(_T("caption"), helpFileReader(captionPath), int(Field::STORE_NO) | int(Field::INDEX_TOKENIZED))); + + // Add the content as a field. + OUString contentPath = d_contentDir + "/" + sEscapedFileName; + doc->add(*_CLNEW Field(_T("content"), helpFileReader(contentPath), int(Field::STORE_NO) | int(Field::INDEX_TOKENIZED))); +} + +lucene::util::Reader *HelpIndexer::helpFileReader(OUString const & path) { + osl::File file(path); + if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) { + file.close(); + OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(path, ustrSystemPath); +#if defined _WIN32 + return TryWithUnicodePathWorkaround(ustrSystemPath, [](const OString& s) { + return _CLNEW lucene::util::FileReader(s.getStr(), "UTF-8"); + }); +#else + OString pathStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding()); + return _CLNEW lucene::util::FileReader(pathStr.getStr(), "UTF-8"); +#endif + } else { + return _CLNEW lucene::util::StringReader(L""); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpIndexer_main.cxx b/helpcompiler/source/HelpIndexer_main.cxx new file mode 100644 index 0000000000..3ad47bbf7a --- /dev/null +++ b/helpcompiler/source/HelpIndexer_main.cxx @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <helpcompiler/HelpIndexer.hxx> +#include <osl/file.hxx> +#include <osl/process.h> +#include <osl/thread.h> +#include <string> +#include <iostream> + +int main(int argc, char **argv) +{ + try + { + const std::string aLang("-lang"); + const std::string aModule("-mod"); + const std::string aDir("-dir"); + + std::string lang; + std::string module; + std::string dir; + + bool error = false; + for (int i = 1; i < argc; ++i) { + if (aLang.compare(argv[i]) == 0) { + if (i + 1 < argc) { + lang = argv[++i]; + } else { + error = true; + } + } else if (aModule.compare(argv[i]) == 0) { + if (i + 1 < argc) { + module = argv[++i]; + } else { + error = true; + } + } else if (aDir.compare(argv[i]) == 0) { + if (i + 1 < argc) { + dir = argv[++i]; + } else { + error = true; + } + } else { + error = true; + } + } + + if (error) { + std::cerr << "Error parsing command-line arguments" << std::endl; + } + + if (error || lang.empty() || module.empty() || dir.empty()) { + std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -dir Dir" << std::endl; + return 1; + } + + OUString sDir; + + osl::File::getFileURLFromSystemPath( + OUString(dir.c_str(), dir.size(), osl_getThreadTextEncoding()), + sDir); + + OUString cwd; + osl_getProcessWorkingDir(&cwd.pData); + + (void)osl::File::getAbsoluteFileURL(cwd, sDir, sDir); + + HelpIndexer indexer( + OUString(lang.c_str(), lang.size(), osl_getThreadTextEncoding()), + OUString(module.c_str(), module.size(), osl_getThreadTextEncoding()), + sDir, sDir); + + if (!indexer.indexDocuments()) { + std::cerr << indexer.getErrorMessage() << std::endl; + return 2; + } + return 0; + } + catch (std::exception& e) + { + std::cerr << "failure: " << e.what() << std::endl; + return 2; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpLinker.cxx b/helpcompiler/source/HelpLinker.cxx new file mode 100644 index 0000000000..b99061d0dd --- /dev/null +++ b/helpcompiler/source/HelpLinker.cxx @@ -0,0 +1,942 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <HelpCompiler.hxx> +#include <HelpLinker.hxx> + +#include <algorithm> +#include <fstream> + +#include <string.h> + +#include <libxslt/transform.h> + +#include <sal/types.h> +#include <o3tl/char16_t2wchar_t.hxx> +#include <sal/log.hxx> + +#include <expat.h> +#include <memory> + +namespace { +FILE* fopen_impl(const fs::path& rPath, const char* szMode) +{ +#ifdef _WIN32 //We need _wfopen to support long file paths on Windows XP + return _wfopen(rPath.native_file_string_w().c_str(), o3tl::toW(OUString::createFromAscii(szMode).getStr())); +#else + return fopen(rPath.native_file_string().c_str(), szMode); +#endif +} +} + +IndexerPreProcessor::IndexerPreProcessor + ( const fs::path& fsIndexBaseDir, + const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet ) +{ + m_fsCaptionFilesDirName = fsIndexBaseDir / "caption"; + fs::create_directory( m_fsCaptionFilesDirName ); + + m_fsContentFilesDirName = fsIndexBaseDir / "content"; + fs::create_directory( m_fsContentFilesDirName ); + + m_xsltStylesheetPtrCaption = xsltParseStylesheetFile + (reinterpret_cast<const xmlChar *>(idxCaptionStylesheet.native_file_string().c_str())); + m_xsltStylesheetPtrContent = xsltParseStylesheetFile + (reinterpret_cast<const xmlChar *>(idxContentStylesheet.native_file_string().c_str())); +} + +IndexerPreProcessor::~IndexerPreProcessor() +{ + if( m_xsltStylesheetPtrCaption ) + xsltFreeStylesheet( m_xsltStylesheetPtrCaption ); + if( m_xsltStylesheetPtrContent ) + xsltFreeStylesheet( m_xsltStylesheetPtrContent ); +} + +static std::string getEncodedPath( const std::string& Path ) +{ + std::string_view aOStr_Path( Path ); + OUString aOUStr_Path( OStringToOUString + ( aOStr_Path, osl_getThreadTextEncoding() ) ); + OUString aPathURL; + osl::File::getFileURLFromSystemPath( aOUStr_Path, aPathURL ); + OString aOStr_PathURL( OUStringToOString + ( aPathURL, osl_getThreadTextEncoding() ) ); + std::string aStdStr_PathURL( aOStr_PathURL ); + return aStdStr_PathURL; +} + +void IndexerPreProcessor::processDocument + ( xmlDocPtr doc, const std::string &EncodedDocPath ) +{ + std::string aStdStr_EncodedDocPathURL = getEncodedPath( EncodedDocPath ); + + if( m_xsltStylesheetPtrCaption ) + { + xmlDocPtr resCaption = xsltApplyStylesheet( m_xsltStylesheetPtrCaption, doc, nullptr ); + xmlNodePtr pResNodeCaption = resCaption->xmlChildrenNode; + if( pResNodeCaption ) + { + fs::path fsCaptionPureTextFile_docURL = m_fsCaptionFilesDirName / aStdStr_EncodedDocPathURL; + FILE* pFile_docURL = fopen_impl( fsCaptionPureTextFile_docURL, "w" ); + if( pFile_docURL ) + { + fprintf( pFile_docURL, "%s\n", pResNodeCaption->content ); + fclose( pFile_docURL ); + } + } + xmlFreeDoc(resCaption); + } + + if( !m_xsltStylesheetPtrContent ) + return; + + xmlDocPtr resContent = xsltApplyStylesheet( m_xsltStylesheetPtrContent, doc, nullptr ); + xmlNodePtr pResNodeContent = resContent->xmlChildrenNode; + if( pResNodeContent ) + { + fs::path fsContentPureTextFile_docURL = m_fsContentFilesDirName / aStdStr_EncodedDocPathURL; + FILE* pFile_docURL = fopen_impl( fsContentPureTextFile_docURL, "w" ); + if( pFile_docURL ) + { + fprintf( pFile_docURL, "%s\n", pResNodeContent->content ); + fclose( pFile_docURL ); + } + } + xmlFreeDoc(resContent); +} + +namespace { + +struct Data +{ + std::vector<std::string> _idList; + + void append(const std::string &id) + { + _idList.push_back(id); + } + + std::string getString() const + { + std::string ret; + for (auto const& elem : _idList) + ret += elem + ";"; + return ret; + } +}; + +} + +static void writeKeyValue_DBHelp( FILE* pFile, const std::string& aKeyStr, const std::string& aValueStr ) +{ + if( pFile == nullptr ) + return; + char const cLF = 10; + unsigned int nKeyLen = aKeyStr.length(); + unsigned int nValueLen = aValueStr.length(); + fprintf( pFile, "%x ", nKeyLen ); + if( nKeyLen > 0 ) + { + if (fwrite( aKeyStr.c_str(), 1, nKeyLen, pFile ) != nKeyLen) + fprintf(stderr, "fwrite to db failed\n"); + } + if (fprintf( pFile, " %x ", nValueLen ) < 0) + fprintf(stderr, "fwrite to db failed\n"); + if( nValueLen > 0 ) + { + if (fwrite( aValueStr.c_str(), 1, nValueLen, pFile ) != nValueLen) + fprintf(stderr, "fwrite to db failed\n"); + } + if (fprintf( pFile, "%c", cLF ) < 0) + fprintf(stderr, "fwrite to db failed\n"); +} + +namespace { + +class HelpKeyword +{ +private: + typedef std::unordered_map<std::string, Data> DataHashtable; + DataHashtable _hash; + +public: + void insert(const std::string &key, const std::string &id) + { + Data &data = _hash[key]; + data.append(id); + } + + void dump_DBHelp( const fs::path& rFileName ) + { + FILE* pFile = fopen_impl( rFileName, "wb" ); + if( pFile == nullptr ) + return; + + for (auto const& elem : _hash) + writeKeyValue_DBHelp( pFile, elem.first, elem.second.getString() ); + + fclose( pFile ); + } +}; + +} + +namespace URLEncoder +{ + static std::string encode(const std::string &rIn) + { + const char * const good = "!$&'()*+,-.=@_"; + static const char hex[17] = "0123456789ABCDEF"; + + std::string result; + for (char c : rIn) + { + if (rtl::isAsciiAlphanumeric (static_cast<unsigned char>(c)) + || strchr (good, c)) + { + result += c; + } else { + result += '%'; + result += hex[static_cast<unsigned char>(c) >> 4]; + result += hex[c & 0xf]; + } + } + return result; + } +} + +void HelpLinker::addBookmark( FILE* pFile_DBHelp, std::string thishid, + const std::string& fileB, const std::string& anchorB, + const std::string& jarfileB, const std::string& titleB) +{ + HCDBG(std::cerr << "HelpLinker::addBookmark " << thishid << " " << + fileB << " " << anchorB << " " << jarfileB << " " << titleB << std::endl); + + thishid = URLEncoder::encode(thishid); + + int fileLen = fileB.length(); + if (!anchorB.empty()) + fileLen += (1 + anchorB.length()); + int dataLen = 1 + fileLen + 1 + jarfileB.length() + 1 + titleB.length(); + + std::vector<unsigned char> dataB(dataLen); + size_t i = 0; + dataB[i++] = static_cast<unsigned char>(fileLen); + for (char j : fileB) + dataB[i++] = static_cast<unsigned char>(j); + if (!anchorB.empty()) + { + dataB[i++] = '#'; + for (char j : anchorB) + dataB[i++] = j; + } + dataB[i++] = static_cast<unsigned char>(jarfileB.length()); + for (char j : jarfileB) + dataB[i++] = j; + + dataB[i++] = static_cast<unsigned char>(titleB.length()); + for (char j : titleB) + dataB[i++] = j; + + if( pFile_DBHelp != nullptr ) + { + std::string aValueStr( dataB.begin(), dataB.end() ); + writeKeyValue_DBHelp( pFile_DBHelp, thishid, aValueStr ); + } +} + +void HelpLinker::initIndexerPreProcessor() +{ + m_pIndexerPreProcessor.reset( new IndexerPreProcessor( indexDirParentName, + idxCaptionStylesheet, idxContentStylesheet ) ); +} + +void HelpLinker::link() +{ + + if( bExtensionMode ) + { + indexDirParentName = extensionDestination; + } + else + { + indexDirParentName = zipdir; + fs::create_directory(indexDirParentName); + } + + std::string mod = module; + std::transform (mod.begin(), mod.end(), mod.begin(), tocharlower); + + // do the work here + // continue with introduction of the overall process thing into the + // here all hzip files will be worked on + bool bUse_ = true; + if( !bExtensionMode ) + bUse_ = false; + + fs::path helpTextFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".ht_" : ".ht"))); + FILE* pFileHelpText_DBHelp = fopen_impl( helpTextFileName_DBHelp, "wb" ); + + fs::path dbBaseFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".db_" : ".db"))); + FILE* pFileDbBase_DBHelp = fopen_impl( dbBaseFileName_DBHelp, "wb" ); + + fs::path keyWordFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".key_" : ".key"))); + + HelpKeyword helpKeyword; + + // catch HelpProcessingException to avoid locking data bases + try + { + bool bIndexForExtension = true; + // lastly, initialize the indexBuilder + if ( (!bExtensionMode || bIndexForExtension) && !helpFiles.empty()) + initIndexerPreProcessor(); + + // here we start our loop over the hzip files. + for (auto const& helpFile : helpFiles) + { + // process one file + // streamTable contains the streams in the hzip file + StreamTable streamTable; + const std::string &xhpFileName = helpFile; + + if (!bExtensionMode && xhpFileName.rfind(".xhp") != xhpFileName.length()-4) + { + // only work on .xhp - files + SAL_WARN("helpcompiler", + "ERROR: input list entry '" + << xhpFileName + << "' has the wrong extension (only files with extension .xhp are accepted)"); + + continue; + } + + fs::path langsourceRoot(sourceRoot); + fs::path xhpFile; + + if( bExtensionMode ) + { + // langsourceRoot == sourceRoot for extensions + std::string xhpFileNameComplete( extensionPath ); + xhpFileNameComplete.append( '/' + xhpFileName ); + xhpFile = fs::path( xhpFileNameComplete ); + } + else + { + langsourceRoot.append( "/" ); + if ( m_bUseLangRoot ) + langsourceRoot.append( lang + '/' ); + xhpFile = fs::path(xhpFileName, fs::native); + } + + HelpCompiler hc( streamTable, std::move(xhpFile), std::move(langsourceRoot), zipdir, + compactStylesheet, embeddStylesheet, module, lang, bExtensionMode ); + + HCDBG(std::cerr << "before compile of " << xhpFileName << std::endl); + hc.compile(); + HCDBG(std::cerr << "after compile of " << xhpFileName << std::endl); + + if (!m_bCreateIndex) + continue; + + std::string documentPath = streamTable.document_path; + if (documentPath.compare(0, 1, "/") == 0) + documentPath = documentPath.substr(1); + + std::string documentJarfile = streamTable.document_module + ".jar"; + + std::string documentTitle = streamTable.document_title; + if (documentTitle.empty()) + documentTitle = "<notitle>"; + + const std::string& fileB = documentPath; + const std::string& jarfileB = documentJarfile; + std::string& titleB = documentTitle; + + // add once this as its own id. + addBookmark( pFileDbBase_DBHelp, documentPath, fileB, std::string(), jarfileB, titleB); + + const std::vector<std::string> *hidlist = streamTable.appl_hidlist.get(); + if (hidlist) + { + // now iterate over all elements of the hidlist + for (auto & elem : *hidlist) + { + std::string thishid = elem; + + std::string anchorB; + size_t index = thishid.rfind('#'); + if (index != std::string::npos) + { + anchorB = thishid.substr(1 + index); + thishid = thishid.substr(0, index); + } + addBookmark( pFileDbBase_DBHelp, thishid, fileB, anchorB, jarfileB, titleB); + } + } + + // now the keywords + const Hashtable *anchorToLL = streamTable.appl_keywords.get(); + if (anchorToLL && !anchorToLL->empty()) + { + std::string fakedHid = URLEncoder::encode(documentPath); + for (auto const& elemAnchor : *anchorToLL) + { + const std::string &anchor = elemAnchor.first; + addBookmark(pFileDbBase_DBHelp, documentPath, fileB, + anchor, jarfileB, titleB); + std::string totalId = fakedHid + "#" + anchor; + // std::cerr << hzipFileName << std::endl; + const LinkedList& ll = elemAnchor.second; + for (auto const& elem : ll) + { + helpKeyword.insert(elem, totalId); + } + } + + } + + // and last the helptexts + const Stringtable *helpTextHash = streamTable.appl_helptexts.get(); + if (helpTextHash) + { + for (auto const& elem : *helpTextHash) + { + std::string helpTextId = elem.first; + const std::string& helpTextText = elem.second; + + helpTextId = URLEncoder::encode(helpTextId); + + if( pFileHelpText_DBHelp != nullptr ) + writeKeyValue_DBHelp( pFileHelpText_DBHelp, helpTextId, helpTextText ); + } + } + + //IndexerPreProcessor + if( !bExtensionMode || bIndexForExtension ) + { + // now the indexing + xmlDocPtr document = streamTable.appl_doc; + if (document) + { + std::string temp = module; + std::transform (temp.begin(), temp.end(), temp.begin(), tocharlower); + m_pIndexerPreProcessor->processDocument(document, URLEncoder::encode(documentPath) ); + } + } + + } + + } + catch( const HelpProcessingException& ) + { + // catch HelpProcessingException to avoid locking data bases + if( pFileHelpText_DBHelp != nullptr ) + fclose( pFileHelpText_DBHelp ); + if( pFileDbBase_DBHelp != nullptr ) + fclose( pFileDbBase_DBHelp ); + throw; + } + + if( pFileHelpText_DBHelp != nullptr ) + fclose( pFileHelpText_DBHelp ); + if( pFileDbBase_DBHelp != nullptr ) + fclose( pFileDbBase_DBHelp ); + + helpKeyword.dump_DBHelp( keyWordFileName_DBHelp); + + if( bExtensionMode ) + return; + + // New index + for (auto const& additionalFile : additionalFiles) + { + const std::string &additionalFileName = additionalFile.second; + const std::string &additionalFileKey = additionalFile.first; + + fs::path fsAdditionalFileName( additionalFileName, fs::native ); + HCDBG({ + std::string aNativeStr = fsAdditionalFileName.native_file_string(); + const char* pStr = aNativeStr.c_str(); + std::cerr << pStr << std::endl; + }); + + fs::path fsTargetName( indexDirParentName / additionalFileKey ); + + fs::copy( fsAdditionalFileName, fsTargetName ); + } +} + + +void HelpLinker::main( std::vector<std::string> &args, + std::string const * pExtensionPath, std::string const * pDestination, + const OUString* pOfficeHelpPath ) +{ + bExtensionMode = false; + helpFiles.clear(); + + if ((!args.empty()) && args[0][0] == '@') + { + std::vector<std::string> stringList; + std::ifstream fileReader(args[0].substr(1).c_str()); + + while (fileReader) + { + std::string token; + fileReader >> token; + if (!token.empty()) + stringList.push_back(token); + } + fileReader.close(); + + args = stringList; + } + + size_t i = 0; + bool bSrcOption = false; + while (i < args.size()) + { + if (args[i].compare("-extlangsrc") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "extension source missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + extsource = args[i]; + } + else if (args[i].compare("-extlangdest") == 0) + { + //If this argument is not provided then the location provided in -extsource will + //also be the destination + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "extension destination missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + extdestination = args[i]; + } + else if (args[i].compare("-src") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "sourceroot missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + bSrcOption = true; + sourceRoot = fs::path(args[i], fs::native); + } + else if (args[i].compare("-compact") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "compactStylesheet missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + compactStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-sty") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "embeddingStylesheet missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + embeddStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-zipdir") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "idxtemp missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + zipdir = fs::path(args[i], fs::native); + } + else if (args[i].compare("-idxcaption") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "idxcaption stylesheet missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + idxCaptionStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-idxcontent") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "idxcontent stylesheet missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + idxContentStylesheet = fs::path(args[i], fs::native); + } + else if (args[i].compare("-o") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "outputfilename missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + outputFile = fs::path(args[i], fs::native); + } + else if (args[i].compare("-mod") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "module name missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + module = args[i]; + } + else if (args[i].compare("-lang") == 0) + { + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "language name missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + lang = args[i]; + } + else if (args[i].compare("-hid") == 0) + { + ++i; + throw HelpProcessingException( HelpProcessingErrorClass::General, "obsolete -hid argument used" ); + } + else if (args[i].compare("-add") == 0) + { + std::string addFile, addFileUnderPath; + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "pathname missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + addFileUnderPath = args[i]; + ++i; + if (i >= args.size()) + { + std::stringstream aStrStream; + aStrStream << "pathname missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + addFile = args[i]; + if (!addFileUnderPath.empty() && !addFile.empty()) + additionalFiles[addFileUnderPath] = addFile; + } + else if (args[i].compare("-nolangroot") == 0) + m_bUseLangRoot = false; + else if (args[i].compare("-noindex") == 0) + m_bCreateIndex = false; + else + helpFiles.push_back(args[i]); + ++i; + } + + //We can be called from the helplinker executable or the extension manager + //In the latter case extsource is not used. + if( (pExtensionPath && pExtensionPath->length() > 0 && pOfficeHelpPath) + || !extsource.empty()) + { + bExtensionMode = true; + if (!extsource.empty()) + { + //called from helplinker.exe, pExtensionPath and pOfficeHelpPath + //should be NULL + sourceRoot = fs::path(extsource, fs::native); + extensionPath = sourceRoot.toUTF8(); + + if (extdestination.empty()) + { + std::stringstream aStrStream; + aStrStream << "-extlangdest is missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + else + { + //Convert from system path to file URL!!! + fs::path p(extdestination, fs::native); + extensionDestination = p.toUTF8(); + } + } + else + { //called from extension manager + extensionPath = *pExtensionPath; + sourceRoot = fs::path(extensionPath); + extensionDestination = *pDestination; + } + //check if -src option was used. This option must not be used + //when extension help is compiled. + if (bSrcOption) + { + std::stringstream aStrStream; + aStrStream << "-src must not be used together with -extsource missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + } + + if (!bExtensionMode && zipdir.empty()) + { + std::stringstream aStrStream; + aStrStream << "no index dir given" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + + if ( (!bExtensionMode && idxCaptionStylesheet.empty()) + || (!extsource.empty() && idxCaptionStylesheet.empty()) ) + { + //No extension mode and extension mode using commandline + //!extsource.empty indicates extension mode using commandline + // -idxcaption parameter is required + std::stringstream aStrStream; + aStrStream << "no index caption stylesheet given" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + else if ( bExtensionMode && extsource.empty()) + { + //This part is used when compileExtensionHelp is called from the extensions manager. + //If extension help is compiled using helplinker in the build process + OUString aIdxCaptionPathFileURL = *pOfficeHelpPath + "/idxcaption.xsl"; + + OString aOStr_IdxCaptionPathFileURL( OUStringToOString + ( aIdxCaptionPathFileURL, osl_getThreadTextEncoding() ) ); + std::string aStdStr_IdxCaptionPathFileURL( aOStr_IdxCaptionPathFileURL ); + + idxCaptionStylesheet = fs::path( aStdStr_IdxCaptionPathFileURL ); + } + + if ( (!bExtensionMode && idxContentStylesheet.empty()) + || (!extsource.empty() && idxContentStylesheet.empty()) ) + { + //No extension mode and extension mode using commandline + //!extsource.empty indicates extension mode using commandline + // -idxcontent parameter is required + std::stringstream aStrStream; + aStrStream << "no index content stylesheet given" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + else if ( bExtensionMode && extsource.empty()) + { + //If extension help is compiled using helplinker in the build process + //then -idxcontent must be supplied + //This part is used when compileExtensionHelp is called from the extensions manager. + OUString aIdxContentPathFileURL = *pOfficeHelpPath + "/idxcontent.xsl"; + + OString aOStr_IdxContentPathFileURL( OUStringToOString + ( aIdxContentPathFileURL, osl_getThreadTextEncoding() ) ); + std::string aStdStr_IdxContentPathFileURL( aOStr_IdxContentPathFileURL ); + + idxContentStylesheet = fs::path( aStdStr_IdxContentPathFileURL ); + } + if (!bExtensionMode && embeddStylesheet.empty()) + { + std::stringstream aStrStream; + aStrStream << "no embedding resolving file given" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + if (sourceRoot.empty()) + { + std::stringstream aStrStream; + aStrStream << "no sourceroot given" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + if (!bExtensionMode && outputFile.empty()) + { + std::stringstream aStrStream; + aStrStream << "no output file given" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + if (module.empty()) + { + std::stringstream aStrStream; + aStrStream << "module missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + if (!bExtensionMode && lang.empty()) + { + std::stringstream aStrStream; + aStrStream << "language missing" << std::endl; + throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() ); + } + link(); +} + +// Variable to set an exception in "C" StructuredXMLErrorFunction +static const HelpProcessingException* GpXMLParsingException = nullptr; + +extern "C" { + +#if LIBXML_VERSION >= 21200 +static void StructuredXMLErrorFunction(SAL_UNUSED_PARAMETER void *, const xmlError* error) +#else +static void StructuredXMLErrorFunction(SAL_UNUSED_PARAMETER void *, xmlErrorPtr error) +#endif +{ + std::string aXMLParsingFile; + if( error->file != nullptr ) + aXMLParsingFile = error->file; + int nXMLParsingLine = error->line; + GpXMLParsingException = new HelpProcessingException(error->message, aXMLParsingFile, nXMLParsingLine); + + // Reset error handler + xmlSetStructuredErrorFunc( nullptr, nullptr ); +} + +} + +HelpProcessingErrorInfo& HelpProcessingErrorInfo::operator=( const struct HelpProcessingException& e ) +{ + m_eErrorClass = e.m_eErrorClass; + m_aErrorMsg = OStringToOUString( std::string_view(e.m_aErrorMsg), osl_getThreadTextEncoding() ); + m_aXMLParsingFile = OStringToOUString( std::string_view(e.m_aXMLParsingFile), osl_getThreadTextEncoding() ); + m_nXMLParsingLine = e.m_nXMLParsingLine; + return *this; +} + + +// Returns true in case of success, false in case of error +bool compileExtensionHelp +( + const OUString& aOfficeHelpPath, + std::u16string_view aExtensionName, + std::u16string_view aExtensionLanguageRoot, + sal_Int32 nXhpFileCount, const OUString* pXhpFiles, + std::u16string_view aDestination, + HelpProcessingErrorInfo& o_rHelpProcessingErrorInfo +) +{ + bool bSuccess = true; + + std::vector<std::string> args; + args.reserve(nXhpFileCount + 2); + args.push_back(std::string("-mod")); + OString aOExtensionName = OUStringToOString( aExtensionName, osl_getThreadTextEncoding() ); + args.push_back(std::string(aOExtensionName)); + + for( sal_Int32 iXhp = 0 ; iXhp < nXhpFileCount ; ++iXhp ) + { + OUString aXhpFile = pXhpFiles[iXhp]; + + OString aOXhpFile = OUStringToOString( aXhpFile, osl_getThreadTextEncoding() ); + args.push_back(std::string(aOXhpFile)); + } + + OString aOExtensionLanguageRoot = OUStringToOString( aExtensionLanguageRoot, osl_getThreadTextEncoding() ); + const char* pExtensionPath = aOExtensionLanguageRoot.getStr(); + std::string aStdStrExtensionPath = pExtensionPath; + OString aODestination = OUStringToOString(aDestination, osl_getThreadTextEncoding()); + const char* pDestination = aODestination.getStr(); + std::string aStdStrDestination = pDestination; + + // Set error handler + xmlSetStructuredErrorFunc( nullptr, StructuredXMLErrorFunction ); + try + { + HelpLinker aHelpLinker; + aHelpLinker.main( args, &aStdStrExtensionPath, &aStdStrDestination, &aOfficeHelpPath ); + } + catch( const HelpProcessingException& e ) + { + if( GpXMLParsingException != nullptr ) + { + o_rHelpProcessingErrorInfo = *GpXMLParsingException; + delete GpXMLParsingException; + GpXMLParsingException = nullptr; + } + else + { + o_rHelpProcessingErrorInfo = e; + } + bSuccess = false; + } + // Reset error handler + xmlSetStructuredErrorFunc( nullptr, nullptr ); + + // i83624: Tree files + // The following basically checks if the help.tree is well formed XML. + // Apparently there have been cases when translations contained + // non-well-formed XML in the past. + OUString aTreeFileURL = OUString::Concat(aExtensionLanguageRoot) + "/help.tree"; + osl::DirectoryItem aTreeFileItem; + osl::FileBase::RC rcGet = osl::DirectoryItem::get( aTreeFileURL, aTreeFileItem ); + osl::FileStatus aFileStatus( osl_FileStatus_Mask_FileSize ); + if( rcGet == osl::FileBase::E_None && + aTreeFileItem.getFileStatus( aFileStatus ) == osl::FileBase::E_None && + aFileStatus.isValid( osl_FileStatus_Mask_FileSize ) ) + { + sal_uInt64 ret, len = aFileStatus.getFileSize(); + std::unique_ptr<char[]> s(new char[ int(len) ]); // the buffer to hold the installed files + osl::File aFile( aTreeFileURL ); + (void)aFile.open( osl_File_OpenFlag_Read ); + aFile.read( s.get(), len, ret ); + aFile.close(); + + XML_Parser parser = XML_ParserCreate( nullptr ); + XML_Status parsed = XML_Parse( parser, s.get(), int( len ), true ); + + if (XML_STATUS_ERROR == parsed) + { + XML_Error nError = XML_GetErrorCode( parser ); + o_rHelpProcessingErrorInfo.m_eErrorClass = HelpProcessingErrorClass::XmlParsing; + o_rHelpProcessingErrorInfo.m_aErrorMsg = OUString::createFromAscii( XML_ErrorString( nError ) ); + o_rHelpProcessingErrorInfo.m_aXMLParsingFile = aTreeFileURL; + // CRASHES!!! o_rHelpProcessingErrorInfo.m_nXMLParsingLine = XML_GetCurrentLineNumber( parser ); + bSuccess = false; + } + + XML_ParserFree( parser ); + } + + return bSuccess; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/HelpLinker_main.cxx b/helpcompiler/source/HelpLinker_main.cxx new file mode 100644 index 0000000000..70ff1f6523 --- /dev/null +++ b/helpcompiler/source/HelpLinker_main.cxx @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <HelpCompiler.hxx> +#include <HelpLinker.hxx> +#include <sal/main.h> +#include <iostream> + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + std::vector<std::string> args; + for (int i = 1; i < argc; ++i) + args.push_back(std::string(argv[i])); + try + { + HelpLinker aHelpLinker; + aHelpLinker.main(args); + } + catch (const HelpProcessingException& e) + { + std::cerr << e.m_aErrorMsg; + exit(1); + } + catch (const std::exception& e) + { + std::cerr << e.what(); + exit(1); + } + return 0; +} diff --git a/helpcompiler/source/HelpSearch.cxx b/helpcompiler/source/HelpSearch.cxx new file mode 100644 index 0000000000..982e672c45 --- /dev/null +++ b/helpcompiler/source/HelpSearch.cxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <helpcompiler/HelpSearch.hxx> +#include <osl/file.hxx> +#include <osl/thread.hxx> + +#include "LuceneHelper.hxx" +#include <CLucene.h> + +HelpSearch::HelpSearch(OUString const &indexDir) +{ + OUString ustrSystemPath; + osl::File::getSystemPathFromFileURL(indexDir, ustrSystemPath); + d_indexDir = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding()); +} + +void HelpSearch::query(OUString const &queryStr, bool captionOnly, + std::vector<OUString> &rDocuments, std::vector<float> &rScores) { + + lucene::index::IndexReader *reader = lucene::index::IndexReader::open(d_indexDir.getStr()); + lucene::search::IndexSearcher searcher(reader); + + const TCHAR* field = captionOnly ? L"caption" : L"content"; + + bool isWildcard = queryStr[queryStr.getLength() - 1] == L'*'; + std::vector<TCHAR> aQueryStr(OUStringToTCHARVec(queryStr)); + lucene::search::Query *pQuery; + if (isWildcard) + pQuery = _CLNEW lucene::search::WildcardQuery(_CLNEW lucene::index::Term(field, aQueryStr.data())); + else + pQuery = _CLNEW lucene::search::TermQuery(_CLNEW lucene::index::Term(field, aQueryStr.data())); + + lucene::search::Hits *hits = searcher.search(pQuery); + for (size_t i = 0; i < hits->length(); ++i) { + lucene::document::Document &doc = hits->doc(i); // Document* belongs to Hits. + wchar_t const *path = doc.get(L"path"); + rDocuments.push_back(TCHARArrayToOUString(path != nullptr ? path : L"")); + rScores.push_back(hits->score(i)); + } + + _CLDELETE(hits); + _CLDELETE(pQuery); + + reader->close(); + _CLDELETE(reader); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/LuceneHelper.cxx b/helpcompiler/source/LuceneHelper.cxx new file mode 100644 index 0000000000..d6a2de2523 --- /dev/null +++ b/helpcompiler/source/LuceneHelper.cxx @@ -0,0 +1,39 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "LuceneHelper.hxx" + +std::vector<TCHAR> OUStringToTCHARVec(OUString const& rStr) +{ + //UTF-16 + if (sizeof(TCHAR) == sizeof(sal_Unicode)) + return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength() + 1); + + //UTF-32 + std::vector<TCHAR> aRet; + for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength();) + { + const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex); + aRet.push_back(nCode); + } + aRet.push_back(0); + return aRet; +} + +OUString TCHARArrayToOUString(TCHAR const* str) +{ + // UTF-16 + if (sizeof(TCHAR) == sizeof(sal_Unicode)) + return OUString(reinterpret_cast<const sal_Unicode*>(str)); + + // UTF-32 + return OUString(reinterpret_cast<const sal_uInt32*>(str), wcslen(str)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/helpcompiler/source/LuceneHelper.hxx b/helpcompiler/source/LuceneHelper.hxx new file mode 100644 index 0000000000..6e8208b1b2 --- /dev/null +++ b/helpcompiler/source/LuceneHelper.hxx @@ -0,0 +1,27 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +#pragma once + +#if defined(__GNUC__) +#pragma GCC visibility push(default) +#endif + +#include <CLucene.h> + +#if defined(__GNUC__) +#pragma GCC visibility pop +#endif + +#include <rtl/ustring.hxx> +#include <vector> + +std::vector<TCHAR> OUStringToTCHARVec(OUString const& rStr); +OUString TCHARArrayToOUString(TCHAR const* str); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |