summaryrefslogtreecommitdiffstats
path: root/helpcompiler/source
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
commit267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree358c9467650e1d0a1d7227a21dac2e3d08b622b2 /helpcompiler/source
parentInitial commit. (diff)
downloadlibreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'helpcompiler/source')
-rw-r--r--helpcompiler/source/BasCodeTagger.cxx230
-rw-r--r--helpcompiler/source/HelpCompiler.cxx508
-rw-r--r--helpcompiler/source/HelpIndexer.cxx210
-rw-r--r--helpcompiler/source/HelpIndexer_main.cxx92
-rw-r--r--helpcompiler/source/HelpLinker.cxx942
-rw-r--r--helpcompiler/source/HelpLinker_main.cxx46
-rw-r--r--helpcompiler/source/HelpSearch.cxx55
-rw-r--r--helpcompiler/source/LuceneHelper.cxx39
-rw-r--r--helpcompiler/source/LuceneHelper.hxx27
9 files changed, 2149 insertions, 0 deletions
diff --git a/helpcompiler/source/BasCodeTagger.cxx b/helpcompiler/source/BasCodeTagger.cxx
new file mode 100644
index 0000000000..3511617f4d
--- /dev/null
+++ b/helpcompiler/source/BasCodeTagger.cxx
@@ -0,0 +1,230 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <BasCodeTagger.hxx>
+#include <rtl/ustring.hxx>
+#include <iostream>
+
+LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc )
+{
+ if ( doc == nullptr )
+ throw BasicCodeTagger::NULL_DOCUMENT;
+ m_pCurrentNode = xmlDocGetRootElement( doc );
+ if ( m_pCurrentNode == nullptr )
+ throw BasicCodeTagger::EMPTY_DOCUMENT;
+ else if ( m_pCurrentNode->xmlChildrenNode != nullptr )
+ m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
+ nextNode();
+}
+
+void LibXmlTreeWalker::nextNode()
+{
+
+ //next node
+ if ( m_pCurrentNode->next == nullptr )
+ {
+ m_pCurrentNode = m_Queue.front();
+ m_Queue.pop_front();
+ }
+ else
+ m_pCurrentNode = m_pCurrentNode->next;
+ //queue children if they exist
+ if ( m_pCurrentNode->xmlChildrenNode != nullptr )
+ m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
+}
+
+void LibXmlTreeWalker::ignoreCurrNodesChildren()
+{
+ if ( m_pCurrentNode->xmlChildrenNode != nullptr )
+ m_Queue.pop_back();
+}
+
+bool LibXmlTreeWalker::end() const
+{
+ return m_pCurrentNode->next == nullptr && m_Queue.empty();
+}
+
+
+BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc ):
+ m_Highlighter(HighlighterLanguage::Basic)
+{
+ if ( rootDoc == nullptr )
+ throw NULL_DOCUMENT;
+ m_pDocument = rootDoc;
+ m_pXmlTreeWalker = nullptr;
+ m_bTaggingCompleted = false;
+}
+
+BasicCodeTagger::~BasicCodeTagger()
+{
+}
+//!Gathers all the <bascode> tag nodes from xml tree.
+/*!
+ * Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
+ */
+void BasicCodeTagger::getBasicCodeContainerNodes()
+{
+ xmlNodePtr currentNode;
+
+ m_BasicCodeContainerTags.clear();
+
+ m_pXmlTreeWalker.reset(new LibXmlTreeWalker( m_pDocument ));
+
+ currentNode = m_pXmlTreeWalker->currentNode();
+ if ( !( xmlStrcmp( currentNode->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
+ { //Found <bascode>
+ m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list
+ }
+ while ( !m_pXmlTreeWalker->end() )
+ {
+ m_pXmlTreeWalker->nextNode();
+ if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
+ { //Found <bascode>
+ m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list
+ m_pXmlTreeWalker->ignoreCurrNodesChildren();
+ }
+ }
+}
+
+//! Extracts Basic Codes contained in <bascode> tags.
+/*!
+ * For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
+ * to the Basic code syntax found in that paragraph.
+ */
+void BasicCodeTagger::tagBasCodeParagraphs()
+{
+ //helper variables
+ xmlNodePtr currParagraph;
+ for (auto const& currBascodeNode : m_BasicCodeContainerTags)
+ {
+ currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph>
+ while ( currParagraph != nullptr )
+ {
+ tagParagraph( currParagraph );
+ currParagraph=currParagraph->next;
+ }
+ }
+ m_BasicCodeContainerTags.clear();
+}
+
+//! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
+void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph )
+{
+ //1. get paragraph text
+ xmlChar* codeSnippet;
+ codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 );
+ if ( codeSnippet == nullptr )
+ {
+ return; //no text, nothing more to do here
+ }
+ //2. delete every child from paragraph (except attributes)
+ xmlNodePtr curNode = paragraph->xmlChildrenNode;
+ xmlNodePtr sibling;
+ while ( curNode != nullptr )
+ {
+ sibling = curNode->next;
+ xmlUnlinkNode( curNode );
+ xmlFreeNode( curNode );
+ curNode = sibling;
+ }
+
+ //3. create new paragraph content
+ OUString strLine( reinterpret_cast<const char*>(codeSnippet),
+ strlen(reinterpret_cast<const char*>(codeSnippet)),
+ RTL_TEXTENCODING_UTF8 );
+ std::vector<HighlightPortion> portions;
+ m_Highlighter.getHighlightPortions( strLine, portions );
+ for (auto const& portion : portions)
+ {
+ OString sToken(OUStringToOString(strLine.subView(portion.nBegin, portion.nEnd-portion.nBegin), RTL_TEXTENCODING_UTF8));
+ xmlNodePtr text = xmlNewText(reinterpret_cast<const xmlChar*>(sToken.getStr()));
+ if ( portion.tokenType != TokenType::Whitespace )
+ {
+ xmlChar* typeStr = getTypeString( portion.tokenType );
+ curNode = xmlNewTextChild( paragraph, nullptr, reinterpret_cast<xmlChar const *>("item"), nullptr );
+ xmlNewProp( curNode, reinterpret_cast<xmlChar const *>("type"), typeStr );
+ xmlAddChild( curNode, text );
+ xmlFree( typeStr );
+ }
+ else
+ xmlAddChild( paragraph, text );
+ }
+ xmlFree( codeSnippet );
+}
+
+//! Manages tagging process.
+/*!
+ * This is the "main" function of BasicCodeTagger.
+ */
+void BasicCodeTagger::tagBasicCodes()
+{
+ if ( m_bTaggingCompleted )
+ return;
+ //gather <bascode> nodes
+ try
+ {
+ getBasicCodeContainerNodes();
+ }
+ catch (TaggerException &ex)
+ {
+ std::cout << "BasCodeTagger error occurred. Error code:" << ex << std::endl;
+ }
+
+ //tag basic code paragraphs in <bascode> tag
+ tagBasCodeParagraphs();
+ m_bTaggingCompleted = true;
+}
+
+//! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
+xmlChar* BasicCodeTagger::getTypeString( TokenType tokenType )
+{
+ const char* str;
+ switch ( tokenType )
+ {
+ case TokenType::Unknown :
+ str = "unknown";
+ break;
+ case TokenType::Identifier :
+ str = "identifier";
+ break;
+ case TokenType::Whitespace :
+ str = "whitespace";
+ break;
+ case TokenType::Number :
+ str = "number";
+ break;
+ case TokenType::String :
+ str = "string";
+ break;
+ case TokenType::EOL :
+ str = "eol";
+ break;
+ case TokenType::Comment :
+ str = "comment";
+ break;
+ case TokenType::Error :
+ str = "error";
+ break;
+ case TokenType::Operator :
+ str = "operator";
+ break;
+ case TokenType::Keywords :
+ str = "keyword";
+ break;
+ case TokenType::Parameter :
+ str = "parameter";
+ break;
+ default :
+ str = "unknown";
+ break;
+ }
+ return xmlCharStrdup( str );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/HelpCompiler.cxx b/helpcompiler/source/HelpCompiler.cxx
new file mode 100644
index 0000000000..120fb702f0
--- /dev/null
+++ b/helpcompiler/source/HelpCompiler.cxx
@@ -0,0 +1,508 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <algorithm>
+#include <memory>
+#include <HelpCompiler.hxx>
+#include <BasCodeTagger.hxx>
+#include <iostream>
+#include <stdlib.h>
+#include <string.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/transform.h>
+#include <rtl/character.hxx>
+#include <sal/log.hxx>
+#include <utility>
+
+HelpCompiler::HelpCompiler(StreamTable &in_streamTable, fs::path in_inputFile,
+ fs::path in_src, fs::path in_zipdir, fs::path in_resCompactStylesheet,
+ fs::path in_resEmbStylesheet, std::string in_module, std::string in_lang,
+ bool in_bExtensionMode)
+ : streamTable(in_streamTable), inputFile(std::move(in_inputFile)),
+ src(std::move(in_src)), zipdir(std::move(in_zipdir)), module(std::move(in_module)), lang(std::move(in_lang)), resCompactStylesheet(std::move(in_resCompactStylesheet)),
+ resEmbStylesheet(std::move(in_resEmbStylesheet)), bExtensionMode( in_bExtensionMode )
+{
+ xmlKeepBlanksDefaultValue = 0;
+ char* os = getenv("OS");
+ if (os)
+ {
+ gui = (strcmp(os, "WNT") == 0 ? "WIN" : (strcmp(os, "MACOSX") == 0 ? "MAC" : "UNIX"));
+ }
+}
+
+void HelpCompiler::tagBasicCodeExamples( xmlDocPtr doc )
+{
+ try
+ {
+ BasicCodeTagger bct( doc );
+ bct.tagBasicCodes();
+ }
+ catch ( BasicCodeTagger::TaggerException &ex )
+ {
+ if ( ex != BasicCodeTagger::EMPTY_DOCUMENT )
+ throw;
+ }
+}
+
+xmlDocPtr HelpCompiler::compactXhpForJar( xmlDocPtr doc )
+{
+ static xsltStylesheetPtr compact = nullptr;
+ static const char *params[2 + 1];
+ params[0] = nullptr;
+ xmlDocPtr compacted;
+
+ if (!compact)
+ {
+ compact = xsltParseStylesheetFile(BAD_CAST(resCompactStylesheet.native_file_string().c_str()));
+ }
+
+ compacted = xsltApplyStylesheet(compact, doc, params);
+ return compacted;
+}
+
+void HelpCompiler::saveXhpForJar( xmlDocPtr doc, const fs::path &filePath )
+{
+ //save processed xhp document in ziptmp<module>_<lang>/text directory
+#ifdef _WIN32
+ std::string pathSep = "\\";
+#else
+ std::string pathSep = "/";
+#endif
+ const std::string& sourceXhpPath = filePath.native_file_string();
+ std::string zipdirPath = zipdir.native_file_string();
+ const std::string srcdirPath( src.native_file_string() );
+ // srcdirPath contains trailing /, but we want the file path with / at the beginning
+ std::string jarXhpPath = sourceXhpPath.substr( srcdirPath.length() - 1 );
+ std::string xhpFileName = jarXhpPath.substr( jarXhpPath.rfind( pathSep ) + 1 );
+ jarXhpPath = jarXhpPath.substr( 0, jarXhpPath.rfind( pathSep ) );
+ if ( !jarXhpPath.compare( 1, 11, "text" + pathSep + "sbasic" ) )
+ {
+ tagBasicCodeExamples( doc );
+ }
+ if ( !jarXhpPath.compare( 1, 11, "text" + pathSep + "shared" ) )
+ {
+ const size_t pos = zipdirPath.find( "ziptmp" );
+ if ( pos != std::string::npos )
+ zipdirPath.replace( pos + 6, module.length(), "shared" );
+ }
+ xmlDocPtr compacted = compactXhpForJar( doc );
+ fs::create_directory( fs::path( zipdirPath + jarXhpPath, fs::native ) );
+ if ( -1 == xmlSaveFormatFileEnc( (zipdirPath + jarXhpPath + pathSep + xhpFileName).c_str(), compacted, "utf-8", 0 ) )
+ std::cerr << "Error saving file to " << (zipdirPath + jarXhpPath + pathSep + xhpFileName).c_str() << std::endl;
+ xmlFreeDoc(compacted);
+}
+
+xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath)
+{
+ xmlDocPtr res;
+ if (bExtensionMode)
+ {
+ // this is the mode when used within LibreOffice for importing help
+ // bundled with an extension
+ res = xmlParseFile(filePath.native_file_string().c_str());
+ }
+ else
+ {
+ // this is the mode when used at build time to generate LibreOffice
+ // help from its xhp source
+ static xsltStylesheetPtr cur = nullptr;
+ static const char *params[2 + 1];
+ if (!cur)
+ {
+ static std::string fsroot('\'' + src.toUTF8() + '\'');
+
+ cur = xsltParseStylesheetFile(BAD_CAST(resEmbStylesheet.native_file_string().c_str()));
+
+ int nbparams = 0;
+ params[nbparams++] = "fsroot";
+ params[nbparams++] = fsroot.c_str();
+ params[nbparams] = nullptr;
+ }
+ xmlDocPtr doc = xmlParseFile(filePath.native_file_string().c_str());
+
+ saveXhpForJar( doc, filePath );
+
+ res = xsltApplyStylesheet(cur, doc, params);
+ xmlFreeDoc(doc);
+ }
+ return res;
+}
+
+// returns a node representing the whole stuff compiled for the current
+// application.
+xmlNodePtr HelpCompiler::clone(xmlNodePtr node, const std::string& appl)
+{
+ xmlNodePtr root = xmlCopyNode(node, 2);
+ if (node->xmlChildrenNode)
+ {
+ xmlNodePtr list = node->xmlChildrenNode;
+ while (list)
+ {
+ if (strcmp(reinterpret_cast<const char*>(list->name), "switchinline") == 0 || strcmp(reinterpret_cast<const char*>(list->name), "switch") == 0)
+ {
+ std::string tmp;
+ xmlChar * prop = xmlGetProp(list, reinterpret_cast<xmlChar const *>("select"));
+ if (prop != nullptr)
+ {
+ if (strcmp(reinterpret_cast<char *>(prop), "sys") == 0)
+ {
+ tmp = gui;
+ }
+ else if (strcmp(reinterpret_cast<char *>(prop), "appl") == 0)
+ {
+ tmp = appl;
+ }
+ xmlFree(prop);
+ }
+ if (!tmp.empty())
+ {
+ bool isCase=false;
+ xmlNodePtr caseList=list->xmlChildrenNode;
+ while (caseList)
+ {
+ xmlChar* select = xmlGetProp(caseList, BAD_CAST("select"));
+ if (select)
+ {
+ if (!strcmp(reinterpret_cast<char*>(select), tmp.c_str()) && !isCase)
+ {
+ isCase=true;
+ xmlNodePtr clp = caseList->xmlChildrenNode;
+ while (clp)
+ {
+ xmlAddChild(root, clone(clp, appl));
+ clp = clp->next;
+ }
+ }
+ xmlFree(select);
+ }
+ else
+ {
+ if ((strcmp(reinterpret_cast<const char*>(caseList->name), "defaultinline") != 0) && (strcmp(reinterpret_cast<const char*>(caseList->name), "default") != 0))
+ {
+ xmlAddChild(root, clone(caseList, appl));
+ }
+ else
+ {
+ if (!isCase)
+ {
+ xmlNodePtr clp = caseList->xmlChildrenNode;
+ while (clp)
+ {
+ xmlAddChild(root, clone(clp, appl));
+ clp = clp->next;
+ }
+ }
+ }
+ }
+ caseList = caseList->next;
+ }
+ }
+ }
+ else
+ {
+ xmlAddChild(root, clone(list, appl));
+ }
+ list = list->next;
+ }
+ }
+ return root;
+}
+
+namespace {
+
+class myparser
+{
+public:
+ std::string documentId;
+ std::string fileName;
+ std::string title;
+ std::unique_ptr< std::vector<std::string> > hidlist;
+ std::unique_ptr<Hashtable> keywords;
+ std::unique_ptr<Stringtable> helptexts;
+private:
+ std::vector<std::string> extendedHelpText;
+public:
+ myparser(std::string indocumentId, std::string infileName,
+ std::string intitle) : documentId(std::move(indocumentId)), fileName(std::move(infileName)),
+ title(std::move(intitle))
+ {
+ hidlist.reset(new std::vector<std::string>);
+ keywords.reset(new Hashtable);
+ helptexts.reset(new Stringtable);
+ }
+ void traverse( xmlNodePtr parentNode );
+private:
+ std::string dump(xmlNodePtr node);
+};
+
+}
+
+std::string myparser::dump(xmlNodePtr node)
+{
+ std::string app;
+ if (node->xmlChildrenNode)
+ {
+ xmlNodePtr list = node->xmlChildrenNode;
+ while (list)
+ {
+ app += dump(list);
+ list = list->next;
+ }
+ }
+ if (xmlNodeIsText(node))
+ {
+ xmlChar *pContent = xmlNodeGetContent(node);
+ app += std::string(reinterpret_cast<char*>(pContent));
+ xmlFree(pContent);
+ }
+ return app;
+}
+
+static void trim(std::string& str)
+{
+ std::string::size_type pos = str.find_last_not_of(' ');
+ if(pos != std::string::npos)
+ {
+ str.erase(pos + 1);
+ pos = str.find_first_not_of(' ');
+ if(pos != std::string::npos)
+ str.erase(0, pos);
+ }
+ else
+ str.clear();
+}
+
+void myparser::traverse( xmlNodePtr parentNode )
+{
+ // traverse all nodes that belong to the parent
+ xmlNodePtr test ;
+ for (test = parentNode->xmlChildrenNode; test; test = test->next)
+ {
+ if (fileName.empty() && !strcmp(reinterpret_cast<const char*>(test->name), "filename"))
+ {
+ xmlNodePtr node = test->xmlChildrenNode;
+ if (xmlNodeIsText(node))
+ {
+ xmlChar *pContent = xmlNodeGetContent(node);
+ fileName = std::string(reinterpret_cast<char*>(pContent));
+ xmlFree(pContent);
+ }
+ }
+ else if (title.empty() && !strcmp(reinterpret_cast<const char*>(test->name), "title"))
+ {
+ title = dump(test);
+ if (title.empty())
+ title = "<notitle>";
+ }
+ else if (!strcmp(reinterpret_cast<const char*>(test->name), "bookmark"))
+ {
+ xmlChar* branchxml = xmlGetProp(test, BAD_CAST("branch"));
+ if (branchxml == nullptr) {
+ throw HelpProcessingException(
+ HelpProcessingErrorClass::XmlParsing, "bookmark lacks branch attribute");
+ }
+ std::string branch(reinterpret_cast<char*>(branchxml));
+ xmlFree (branchxml);
+ xmlChar* idxml = xmlGetProp(test, BAD_CAST("id"));
+ if (idxml == nullptr) {
+ throw HelpProcessingException(
+ HelpProcessingErrorClass::XmlParsing, "bookmark lacks id attribute");
+ }
+ std::string anchor(reinterpret_cast<char*>(idxml));
+ xmlFree (idxml);
+
+ if (branch.compare(0, 3, "hid") == 0)
+ {
+ size_t index = branch.find('/');
+ if (index != std::string::npos)
+ {
+ auto hid = branch.substr(1 + index);
+ // one shall serve as a documentId
+ if (documentId.empty())
+ documentId = hid;
+ extendedHelpText.push_back(hid);
+ HCDBG(std::cerr << "hid pushback" << (anchor.empty() ? hid : hid + "#" + anchor) << std::endl);
+ hidlist->push_back( anchor.empty() ? hid : hid + "#" + anchor);
+ }
+ else
+ continue;
+ }
+ else if (branch.compare("index") == 0)
+ {
+ LinkedList ll;
+
+ for (xmlNodePtr nd = test->xmlChildrenNode; nd; nd = nd->next)
+ {
+ if (strcmp(reinterpret_cast<const char*>(nd->name), "bookmark_value"))
+ continue;
+
+ std::string embedded;
+ xmlChar* embeddedxml = xmlGetProp(nd, BAD_CAST("embedded"));
+ if (embeddedxml)
+ {
+ embedded = std::string(reinterpret_cast<char*>(embeddedxml));
+ xmlFree (embeddedxml);
+ std::transform (embedded.begin(), embedded.end(),
+ embedded.begin(), tocharlower);
+ }
+
+ bool isEmbedded = !embedded.empty() && embedded.compare("true") == 0;
+ if (isEmbedded)
+ continue;
+
+ std::string keyword = dump(nd);
+ size_t keywordSem = keyword.find(';');
+ if (keywordSem != std::string::npos)
+ {
+ std::string tmppre =
+ keyword.substr(0,keywordSem);
+ trim(tmppre);
+ std::string tmppos =
+ keyword.substr(1+keywordSem);
+ trim(tmppos);
+ keyword = tmppre + ";" + tmppos;
+ }
+ ll.push_back(keyword);
+ }
+ if (!ll.empty())
+ (*keywords)[anchor] = ll;
+ }
+ else if (branch.compare("contents") == 0)
+ {
+ // currently not used
+ }
+ }
+ else if (!strcmp(reinterpret_cast<const char*>(test->name), "ahelp"))
+ {
+ //tool-tip
+ std::string text = dump(test);
+ std::replace(text.begin(), text.end(), '\n', ' ');
+ trim(text);
+
+ //tool-tip target
+ std::string hidstr("."); //. == previous seen hid bookmarks
+ xmlChar* hid = xmlGetProp(test, BAD_CAST("hid"));
+ if (hid)
+ {
+ hidstr = std::string(reinterpret_cast<char*>(hid));
+ xmlFree (hid);
+ }
+
+ if (hidstr != "." && !hidstr.empty()) //simple case of explicitly named target
+ {
+ assert(!hidstr.empty());
+ (*helptexts)[hidstr] = text;
+ }
+ else //apply to list of "current" hids determined by recent bookmarks that have hid in their branch
+ {
+ //TODO: make these asserts and flush out all our broken help ids
+ SAL_WARN_IF(hidstr.empty(), "helpcompiler", "hid='' for text:" << text);
+ SAL_WARN_IF(!hidstr.empty() && extendedHelpText.empty(), "helpcompiler", "hid='.' with no hid bookmark branches in file: " << fileName + " for text: " << text);
+ for (const std::string& name : extendedHelpText)
+ {
+ (*helptexts)[name] = text;
+ }
+ }
+ extendedHelpText.clear();
+ }
+ // traverse children
+ traverse(test);
+ }
+}
+
+void HelpCompiler::compile()
+{
+ // we now have the jaroutputstream, which will contain the document.
+ // now determine the document as a dom tree in variable docResolved
+
+ xmlDocPtr docResolvedOrg = getSourceDocument(inputFile);
+
+ // now add path to the document
+ // resolve the dom
+
+ if (!docResolvedOrg)
+ {
+ std::stringstream aStrStream;
+ aStrStream << "ERROR: file not existing: " << inputFile.native_file_string().c_str() << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ std::string documentId;
+ std::string fileName;
+ std::string title;
+ // returns a clone of the document with switch-cases resolved
+ std::string appl = module.substr(1);
+ for (char & i : appl)
+ {
+ i=rtl::toAsciiUpperCase(static_cast<unsigned char>(i));
+ }
+ xmlNodePtr docResolved = clone(xmlDocGetRootElement(docResolvedOrg), appl);
+ myparser aparser(documentId, fileName, title);
+ aparser.traverse(docResolved);
+ documentId = aparser.documentId;
+ fileName = aparser.fileName;
+ title = aparser.title;
+
+ HCDBG(std::cerr << documentId << " : " << fileName << " : " << title << std::endl);
+
+ xmlDocPtr docResolvedDoc = xmlCopyDoc(docResolvedOrg, false);
+ xmlDocSetRootElement(docResolvedDoc, docResolved);
+
+ streamTable.dropappl();
+ streamTable.appl_doc = docResolvedDoc;
+ streamTable.appl_hidlist = std::move(aparser.hidlist);
+ streamTable.appl_helptexts = std::move(aparser.helptexts);
+ streamTable.appl_keywords = std::move(aparser.keywords);
+
+ streamTable.document_path = fileName;
+ streamTable.document_title = title;
+ std::string actMod = module;
+
+ if ( !bExtensionMode && !fileName.empty())
+ {
+ if (fileName.compare(0, 6, "/text/") == 0)
+ {
+ actMod = fileName.substr(strlen("/text/"));
+ actMod = actMod.substr(0, actMod.find('/'));
+ }
+ }
+ streamTable.document_module = actMod;
+ xmlFreeDoc(docResolvedOrg);
+}
+
+namespace fs
+{
+ void create_directory(const fs::path& indexDirName)
+ {
+ HCDBG(
+ std::cerr << "creating " <<
+ OUStringToOString(indexDirName.data, RTL_TEXTENCODING_UTF8).getStr()
+ << std::endl
+ );
+ osl::Directory::createPath(indexDirName.data);
+ }
+
+ void copy(const fs::path &src, const fs::path &dest)
+ {
+ osl::File::copy(src.data, dest.data);
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/HelpIndexer.cxx b/helpcompiler/source/HelpIndexer.cxx
new file mode 100644
index 0000000000..65e46743b4
--- /dev/null
+++ b/helpcompiler/source/HelpIndexer.cxx
@@ -0,0 +1,210 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <helpcompiler/HelpIndexer.hxx>
+
+#include <rtl/string.hxx>
+#include <rtl/uri.hxx>
+#include <o3tl/runtimetooustring.hxx>
+#include <osl/file.hxx>
+#include <osl/thread.h>
+#include <o3tl/string_view.hxx>
+#include <memory>
+#include <utility>
+
+#include "LuceneHelper.hxx"
+#include <CLucene.h>
+#include <CLucene/analysis/LanguageBasedAnalyzer.h>
+
+#if defined _WIN32
+#include <o3tl/char16_t2wchar_t.hxx>
+#include <prewin.h>
+#include <postwin.h>
+#endif
+
+using namespace lucene::document;
+
+HelpIndexer::HelpIndexer(OUString lang, OUString module,
+ std::u16string_view srcDir, std::u16string_view outDir)
+ : d_lang(std::move(lang)), d_module(std::move(module))
+{
+ d_indexDir = outDir + OUStringChar('/') + d_module + ".idxl";
+ osl_getAbsoluteFileURL(nullptr, d_indexDir.pData, &d_indexDir.pData);
+ d_captionDir = OUString::Concat(srcDir) + "/caption";
+ osl_getAbsoluteFileURL(nullptr, d_captionDir.pData, &d_captionDir.pData);
+ d_contentDir = OUString::Concat(srcDir) + "/content";
+ osl_getAbsoluteFileURL(nullptr, d_contentDir.pData, &d_contentDir.pData);
+}
+
+#if defined _WIN32
+namespace
+{
+template <class Constructor>
+auto TryWithUnicodePathWorkaround(const OUString& ustrPath, const Constructor& constructor)
+{
+ const rtl_TextEncoding eThreadEncoding = osl_getThreadTextEncoding();
+ OString sPath = OUStringToOString(ustrPath, eThreadEncoding);
+ try
+ {
+ // First try path in thread encoding (ACP in case of Windows).
+ return constructor(sPath);
+ }
+ catch (const CLuceneError&)
+ {
+ // Maybe the path contains characters not representable in ACP. There's no API in lucene
+ // that takes Unicode strings (they take 8-bit strings, and pass them to CRT library
+ // functions without conversion).
+
+ // For a workaround, try short name, which should only contain ASCII characters. Would
+ // not help (i.e., would return original long name) if short (8.3) file name creation is
+ // disabled in OS or volume settings.
+ wchar_t buf[32767];
+ if (GetShortPathNameW(o3tl::toW(ustrPath.getStr()), buf, std::size(buf)) == 0)
+ throw;
+ sPath = OUStringToOString(o3tl::toU(buf), eThreadEncoding);
+ return constructor(sPath);
+ }
+}
+}
+#endif
+
+bool HelpIndexer::indexDocuments()
+{
+ if (!scanForFiles())
+ return false;
+
+ try
+ {
+ std::u16string_view sLang = o3tl::getToken(d_lang, 0, '-');
+ bool bUseCJK = sLang == u"ja" || sLang == u"ko" || sLang == u"zh";
+
+ // Construct the analyzer appropriate for the given language
+ std::unique_ptr<lucene::analysis::Analyzer> analyzer;
+ if (bUseCJK)
+ analyzer.reset(new lucene::analysis::LanguageBasedAnalyzer(L"cjk"));
+ else
+ analyzer.reset(new lucene::analysis::standard::StandardAnalyzer());
+
+ OUString ustrSystemPath;
+ osl::File::getSystemPathFromFileURL(d_indexDir, ustrSystemPath);
+
+#if defined _WIN32
+ // Make sure the path exists, or GetShortPathNameW (if attempted) will fail.
+ osl::Directory::createPath(d_indexDir);
+ auto writer = TryWithUnicodePathWorkaround(ustrSystemPath, [&analyzer](const OString& s) {
+ return std::make_unique<lucene::index::IndexWriter>(s.getStr(), analyzer.get(), true);
+ });
+#else
+ OString indexDirStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
+ auto writer = std::make_unique<lucene::index::IndexWriter>(indexDirStr.getStr(),
+ analyzer.get(), true);
+#endif
+
+#ifndef SYSTEM_CLUCENE
+ // avoid random values in index file, making help indices reproducible
+ writer->setSegmentInfoStartVersion(0);
+#endif
+
+ //Double limit of tokens allowed, otherwise we'll get a too-many-tokens
+ //exception for ja help. Could alternative ignore the exception and get
+ //truncated results as per java-Lucene apparently
+ writer->setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2);
+
+ // Index the identified help files
+ Document doc;
+ for (auto const& elem : d_files)
+ {
+ helpDocument(elem, &doc);
+ writer->addDocument(&doc);
+ doc.clear();
+ }
+
+ // Optimize the index
+ writer->optimize();
+ }
+ catch (CLuceneError &e)
+ {
+ d_error = o3tl::runtimeToOUString(e.what());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool HelpIndexer::scanForFiles() {
+ if (!scanForFiles(d_contentDir)) {
+ return false;
+ }
+ if (!scanForFiles(d_captionDir)) {
+ return false;
+ }
+ return true;
+}
+
+bool HelpIndexer::scanForFiles(OUString const & path) {
+
+ osl::Directory dir(path);
+ if (osl::FileBase::E_None != dir.open()) {
+ d_error = "Error reading directory " + path;
+ return false;
+ }
+
+ osl::DirectoryItem item;
+ osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type);
+ while (dir.getNextItem(item) == osl::FileBase::E_None) {
+ item.getFileStatus(fileStatus);
+ if (fileStatus.getFileType() == osl::FileStatus::Regular) {
+ d_files.insert(fileStatus.getFileName());
+ }
+ }
+
+ return true;
+}
+
+void HelpIndexer::helpDocument(OUString const & fileName, Document *doc) const {
+ // Add the help path as an indexed, untokenized field.
+
+ OUString path = "#HLP#" + d_module + "/" + fileName;
+ std::vector<TCHAR> aPath(OUStringToTCHARVec(path));
+ doc->add(*_CLNEW Field(_T("path"), aPath.data(), int(Field::STORE_YES) | int(Field::INDEX_UNTOKENIZED)));
+
+ OUString sEscapedFileName =
+ rtl::Uri::encode(fileName,
+ rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8);
+
+ // Add the caption as a field.
+ OUString captionPath = d_captionDir + "/" + sEscapedFileName;
+ doc->add(*_CLNEW Field(_T("caption"), helpFileReader(captionPath), int(Field::STORE_NO) | int(Field::INDEX_TOKENIZED)));
+
+ // Add the content as a field.
+ OUString contentPath = d_contentDir + "/" + sEscapedFileName;
+ doc->add(*_CLNEW Field(_T("content"), helpFileReader(contentPath), int(Field::STORE_NO) | int(Field::INDEX_TOKENIZED)));
+}
+
+lucene::util::Reader *HelpIndexer::helpFileReader(OUString const & path) {
+ osl::File file(path);
+ if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) {
+ file.close();
+ OUString ustrSystemPath;
+ osl::File::getSystemPathFromFileURL(path, ustrSystemPath);
+#if defined _WIN32
+ return TryWithUnicodePathWorkaround(ustrSystemPath, [](const OString& s) {
+ return _CLNEW lucene::util::FileReader(s.getStr(), "UTF-8");
+ });
+#else
+ OString pathStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
+ return _CLNEW lucene::util::FileReader(pathStr.getStr(), "UTF-8");
+#endif
+ } else {
+ return _CLNEW lucene::util::StringReader(L"");
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/HelpIndexer_main.cxx b/helpcompiler/source/HelpIndexer_main.cxx
new file mode 100644
index 0000000000..3ad47bbf7a
--- /dev/null
+++ b/helpcompiler/source/HelpIndexer_main.cxx
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <helpcompiler/HelpIndexer.hxx>
+#include <osl/file.hxx>
+#include <osl/process.h>
+#include <osl/thread.h>
+#include <string>
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+ try
+ {
+ const std::string aLang("-lang");
+ const std::string aModule("-mod");
+ const std::string aDir("-dir");
+
+ std::string lang;
+ std::string module;
+ std::string dir;
+
+ bool error = false;
+ for (int i = 1; i < argc; ++i) {
+ if (aLang.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ lang = argv[++i];
+ } else {
+ error = true;
+ }
+ } else if (aModule.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ module = argv[++i];
+ } else {
+ error = true;
+ }
+ } else if (aDir.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ dir = argv[++i];
+ } else {
+ error = true;
+ }
+ } else {
+ error = true;
+ }
+ }
+
+ if (error) {
+ std::cerr << "Error parsing command-line arguments" << std::endl;
+ }
+
+ if (error || lang.empty() || module.empty() || dir.empty()) {
+ std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -dir Dir" << std::endl;
+ return 1;
+ }
+
+ OUString sDir;
+
+ osl::File::getFileURLFromSystemPath(
+ OUString(dir.c_str(), dir.size(), osl_getThreadTextEncoding()),
+ sDir);
+
+ OUString cwd;
+ osl_getProcessWorkingDir(&cwd.pData);
+
+ (void)osl::File::getAbsoluteFileURL(cwd, sDir, sDir);
+
+ HelpIndexer indexer(
+ OUString(lang.c_str(), lang.size(), osl_getThreadTextEncoding()),
+ OUString(module.c_str(), module.size(), osl_getThreadTextEncoding()),
+ sDir, sDir);
+
+ if (!indexer.indexDocuments()) {
+ std::cerr << indexer.getErrorMessage() << std::endl;
+ return 2;
+ }
+ return 0;
+ }
+ catch (std::exception& e)
+ {
+ std::cerr << "failure: " << e.what() << std::endl;
+ return 2;
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/HelpLinker.cxx b/helpcompiler/source/HelpLinker.cxx
new file mode 100644
index 0000000000..b99061d0dd
--- /dev/null
+++ b/helpcompiler/source/HelpLinker.cxx
@@ -0,0 +1,942 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <HelpCompiler.hxx>
+#include <HelpLinker.hxx>
+
+#include <algorithm>
+#include <fstream>
+
+#include <string.h>
+
+#include <libxslt/transform.h>
+
+#include <sal/types.h>
+#include <o3tl/char16_t2wchar_t.hxx>
+#include <sal/log.hxx>
+
+#include <expat.h>
+#include <memory>
+
+namespace {
+FILE* fopen_impl(const fs::path& rPath, const char* szMode)
+{
+#ifdef _WIN32 //We need _wfopen to support long file paths on Windows XP
+ return _wfopen(rPath.native_file_string_w().c_str(), o3tl::toW(OUString::createFromAscii(szMode).getStr()));
+#else
+ return fopen(rPath.native_file_string().c_str(), szMode);
+#endif
+}
+}
+
+IndexerPreProcessor::IndexerPreProcessor
+ ( const fs::path& fsIndexBaseDir,
+ const fs::path& idxCaptionStylesheet, const fs::path& idxContentStylesheet )
+{
+ m_fsCaptionFilesDirName = fsIndexBaseDir / "caption";
+ fs::create_directory( m_fsCaptionFilesDirName );
+
+ m_fsContentFilesDirName = fsIndexBaseDir / "content";
+ fs::create_directory( m_fsContentFilesDirName );
+
+ m_xsltStylesheetPtrCaption = xsltParseStylesheetFile
+ (reinterpret_cast<const xmlChar *>(idxCaptionStylesheet.native_file_string().c_str()));
+ m_xsltStylesheetPtrContent = xsltParseStylesheetFile
+ (reinterpret_cast<const xmlChar *>(idxContentStylesheet.native_file_string().c_str()));
+}
+
+IndexerPreProcessor::~IndexerPreProcessor()
+{
+ if( m_xsltStylesheetPtrCaption )
+ xsltFreeStylesheet( m_xsltStylesheetPtrCaption );
+ if( m_xsltStylesheetPtrContent )
+ xsltFreeStylesheet( m_xsltStylesheetPtrContent );
+}
+
+static std::string getEncodedPath( const std::string& Path )
+{
+ std::string_view aOStr_Path( Path );
+ OUString aOUStr_Path( OStringToOUString
+ ( aOStr_Path, osl_getThreadTextEncoding() ) );
+ OUString aPathURL;
+ osl::File::getFileURLFromSystemPath( aOUStr_Path, aPathURL );
+ OString aOStr_PathURL( OUStringToOString
+ ( aPathURL, osl_getThreadTextEncoding() ) );
+ std::string aStdStr_PathURL( aOStr_PathURL );
+ return aStdStr_PathURL;
+}
+
+void IndexerPreProcessor::processDocument
+ ( xmlDocPtr doc, const std::string &EncodedDocPath )
+{
+ std::string aStdStr_EncodedDocPathURL = getEncodedPath( EncodedDocPath );
+
+ if( m_xsltStylesheetPtrCaption )
+ {
+ xmlDocPtr resCaption = xsltApplyStylesheet( m_xsltStylesheetPtrCaption, doc, nullptr );
+ xmlNodePtr pResNodeCaption = resCaption->xmlChildrenNode;
+ if( pResNodeCaption )
+ {
+ fs::path fsCaptionPureTextFile_docURL = m_fsCaptionFilesDirName / aStdStr_EncodedDocPathURL;
+ FILE* pFile_docURL = fopen_impl( fsCaptionPureTextFile_docURL, "w" );
+ if( pFile_docURL )
+ {
+ fprintf( pFile_docURL, "%s\n", pResNodeCaption->content );
+ fclose( pFile_docURL );
+ }
+ }
+ xmlFreeDoc(resCaption);
+ }
+
+ if( !m_xsltStylesheetPtrContent )
+ return;
+
+ xmlDocPtr resContent = xsltApplyStylesheet( m_xsltStylesheetPtrContent, doc, nullptr );
+ xmlNodePtr pResNodeContent = resContent->xmlChildrenNode;
+ if( pResNodeContent )
+ {
+ fs::path fsContentPureTextFile_docURL = m_fsContentFilesDirName / aStdStr_EncodedDocPathURL;
+ FILE* pFile_docURL = fopen_impl( fsContentPureTextFile_docURL, "w" );
+ if( pFile_docURL )
+ {
+ fprintf( pFile_docURL, "%s\n", pResNodeContent->content );
+ fclose( pFile_docURL );
+ }
+ }
+ xmlFreeDoc(resContent);
+}
+
+namespace {
+
+struct Data
+{
+ std::vector<std::string> _idList;
+
+ void append(const std::string &id)
+ {
+ _idList.push_back(id);
+ }
+
+ std::string getString() const
+ {
+ std::string ret;
+ for (auto const& elem : _idList)
+ ret += elem + ";";
+ return ret;
+ }
+};
+
+}
+
+static void writeKeyValue_DBHelp( FILE* pFile, const std::string& aKeyStr, const std::string& aValueStr )
+{
+ if( pFile == nullptr )
+ return;
+ char const cLF = 10;
+ unsigned int nKeyLen = aKeyStr.length();
+ unsigned int nValueLen = aValueStr.length();
+ fprintf( pFile, "%x ", nKeyLen );
+ if( nKeyLen > 0 )
+ {
+ if (fwrite( aKeyStr.c_str(), 1, nKeyLen, pFile ) != nKeyLen)
+ fprintf(stderr, "fwrite to db failed\n");
+ }
+ if (fprintf( pFile, " %x ", nValueLen ) < 0)
+ fprintf(stderr, "fwrite to db failed\n");
+ if( nValueLen > 0 )
+ {
+ if (fwrite( aValueStr.c_str(), 1, nValueLen, pFile ) != nValueLen)
+ fprintf(stderr, "fwrite to db failed\n");
+ }
+ if (fprintf( pFile, "%c", cLF ) < 0)
+ fprintf(stderr, "fwrite to db failed\n");
+}
+
+namespace {
+
+class HelpKeyword
+{
+private:
+ typedef std::unordered_map<std::string, Data> DataHashtable;
+ DataHashtable _hash;
+
+public:
+ void insert(const std::string &key, const std::string &id)
+ {
+ Data &data = _hash[key];
+ data.append(id);
+ }
+
+ void dump_DBHelp( const fs::path& rFileName )
+ {
+ FILE* pFile = fopen_impl( rFileName, "wb" );
+ if( pFile == nullptr )
+ return;
+
+ for (auto const& elem : _hash)
+ writeKeyValue_DBHelp( pFile, elem.first, elem.second.getString() );
+
+ fclose( pFile );
+ }
+};
+
+}
+
+namespace URLEncoder
+{
+ static std::string encode(const std::string &rIn)
+ {
+ const char * const good = "!$&'()*+,-.=@_";
+ static const char hex[17] = "0123456789ABCDEF";
+
+ std::string result;
+ for (char c : rIn)
+ {
+ if (rtl::isAsciiAlphanumeric (static_cast<unsigned char>(c))
+ || strchr (good, c))
+ {
+ result += c;
+ } else {
+ result += '%';
+ result += hex[static_cast<unsigned char>(c) >> 4];
+ result += hex[c & 0xf];
+ }
+ }
+ return result;
+ }
+}
+
+void HelpLinker::addBookmark( FILE* pFile_DBHelp, std::string thishid,
+ const std::string& fileB, const std::string& anchorB,
+ const std::string& jarfileB, const std::string& titleB)
+{
+ HCDBG(std::cerr << "HelpLinker::addBookmark " << thishid << " " <<
+ fileB << " " << anchorB << " " << jarfileB << " " << titleB << std::endl);
+
+ thishid = URLEncoder::encode(thishid);
+
+ int fileLen = fileB.length();
+ if (!anchorB.empty())
+ fileLen += (1 + anchorB.length());
+ int dataLen = 1 + fileLen + 1 + jarfileB.length() + 1 + titleB.length();
+
+ std::vector<unsigned char> dataB(dataLen);
+ size_t i = 0;
+ dataB[i++] = static_cast<unsigned char>(fileLen);
+ for (char j : fileB)
+ dataB[i++] = static_cast<unsigned char>(j);
+ if (!anchorB.empty())
+ {
+ dataB[i++] = '#';
+ for (char j : anchorB)
+ dataB[i++] = j;
+ }
+ dataB[i++] = static_cast<unsigned char>(jarfileB.length());
+ for (char j : jarfileB)
+ dataB[i++] = j;
+
+ dataB[i++] = static_cast<unsigned char>(titleB.length());
+ for (char j : titleB)
+ dataB[i++] = j;
+
+ if( pFile_DBHelp != nullptr )
+ {
+ std::string aValueStr( dataB.begin(), dataB.end() );
+ writeKeyValue_DBHelp( pFile_DBHelp, thishid, aValueStr );
+ }
+}
+
+void HelpLinker::initIndexerPreProcessor()
+{
+ m_pIndexerPreProcessor.reset( new IndexerPreProcessor( indexDirParentName,
+ idxCaptionStylesheet, idxContentStylesheet ) );
+}
+
+void HelpLinker::link()
+{
+
+ if( bExtensionMode )
+ {
+ indexDirParentName = extensionDestination;
+ }
+ else
+ {
+ indexDirParentName = zipdir;
+ fs::create_directory(indexDirParentName);
+ }
+
+ std::string mod = module;
+ std::transform (mod.begin(), mod.end(), mod.begin(), tocharlower);
+
+ // do the work here
+ // continue with introduction of the overall process thing into the
+ // here all hzip files will be worked on
+ bool bUse_ = true;
+ if( !bExtensionMode )
+ bUse_ = false;
+
+ fs::path helpTextFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".ht_" : ".ht")));
+ FILE* pFileHelpText_DBHelp = fopen_impl( helpTextFileName_DBHelp, "wb" );
+
+ fs::path dbBaseFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".db_" : ".db")));
+ FILE* pFileDbBase_DBHelp = fopen_impl( dbBaseFileName_DBHelp, "wb" );
+
+ fs::path keyWordFileName_DBHelp(indexDirParentName / (mod + (bUse_ ? ".key_" : ".key")));
+
+ HelpKeyword helpKeyword;
+
+ // catch HelpProcessingException to avoid locking data bases
+ try
+ {
+ bool bIndexForExtension = true;
+ // lastly, initialize the indexBuilder
+ if ( (!bExtensionMode || bIndexForExtension) && !helpFiles.empty())
+ initIndexerPreProcessor();
+
+ // here we start our loop over the hzip files.
+ for (auto const& helpFile : helpFiles)
+ {
+ // process one file
+ // streamTable contains the streams in the hzip file
+ StreamTable streamTable;
+ const std::string &xhpFileName = helpFile;
+
+ if (!bExtensionMode && xhpFileName.rfind(".xhp") != xhpFileName.length()-4)
+ {
+ // only work on .xhp - files
+ SAL_WARN("helpcompiler",
+ "ERROR: input list entry '"
+ << xhpFileName
+ << "' has the wrong extension (only files with extension .xhp are accepted)");
+
+ continue;
+ }
+
+ fs::path langsourceRoot(sourceRoot);
+ fs::path xhpFile;
+
+ if( bExtensionMode )
+ {
+ // langsourceRoot == sourceRoot for extensions
+ std::string xhpFileNameComplete( extensionPath );
+ xhpFileNameComplete.append( '/' + xhpFileName );
+ xhpFile = fs::path( xhpFileNameComplete );
+ }
+ else
+ {
+ langsourceRoot.append( "/" );
+ if ( m_bUseLangRoot )
+ langsourceRoot.append( lang + '/' );
+ xhpFile = fs::path(xhpFileName, fs::native);
+ }
+
+ HelpCompiler hc( streamTable, std::move(xhpFile), std::move(langsourceRoot), zipdir,
+ compactStylesheet, embeddStylesheet, module, lang, bExtensionMode );
+
+ HCDBG(std::cerr << "before compile of " << xhpFileName << std::endl);
+ hc.compile();
+ HCDBG(std::cerr << "after compile of " << xhpFileName << std::endl);
+
+ if (!m_bCreateIndex)
+ continue;
+
+ std::string documentPath = streamTable.document_path;
+ if (documentPath.compare(0, 1, "/") == 0)
+ documentPath = documentPath.substr(1);
+
+ std::string documentJarfile = streamTable.document_module + ".jar";
+
+ std::string documentTitle = streamTable.document_title;
+ if (documentTitle.empty())
+ documentTitle = "<notitle>";
+
+ const std::string& fileB = documentPath;
+ const std::string& jarfileB = documentJarfile;
+ std::string& titleB = documentTitle;
+
+ // add once this as its own id.
+ addBookmark( pFileDbBase_DBHelp, documentPath, fileB, std::string(), jarfileB, titleB);
+
+ const std::vector<std::string> *hidlist = streamTable.appl_hidlist.get();
+ if (hidlist)
+ {
+ // now iterate over all elements of the hidlist
+ for (auto & elem : *hidlist)
+ {
+ std::string thishid = elem;
+
+ std::string anchorB;
+ size_t index = thishid.rfind('#');
+ if (index != std::string::npos)
+ {
+ anchorB = thishid.substr(1 + index);
+ thishid = thishid.substr(0, index);
+ }
+ addBookmark( pFileDbBase_DBHelp, thishid, fileB, anchorB, jarfileB, titleB);
+ }
+ }
+
+ // now the keywords
+ const Hashtable *anchorToLL = streamTable.appl_keywords.get();
+ if (anchorToLL && !anchorToLL->empty())
+ {
+ std::string fakedHid = URLEncoder::encode(documentPath);
+ for (auto const& elemAnchor : *anchorToLL)
+ {
+ const std::string &anchor = elemAnchor.first;
+ addBookmark(pFileDbBase_DBHelp, documentPath, fileB,
+ anchor, jarfileB, titleB);
+ std::string totalId = fakedHid + "#" + anchor;
+ // std::cerr << hzipFileName << std::endl;
+ const LinkedList& ll = elemAnchor.second;
+ for (auto const& elem : ll)
+ {
+ helpKeyword.insert(elem, totalId);
+ }
+ }
+
+ }
+
+ // and last the helptexts
+ const Stringtable *helpTextHash = streamTable.appl_helptexts.get();
+ if (helpTextHash)
+ {
+ for (auto const& elem : *helpTextHash)
+ {
+ std::string helpTextId = elem.first;
+ const std::string& helpTextText = elem.second;
+
+ helpTextId = URLEncoder::encode(helpTextId);
+
+ if( pFileHelpText_DBHelp != nullptr )
+ writeKeyValue_DBHelp( pFileHelpText_DBHelp, helpTextId, helpTextText );
+ }
+ }
+
+ //IndexerPreProcessor
+ if( !bExtensionMode || bIndexForExtension )
+ {
+ // now the indexing
+ xmlDocPtr document = streamTable.appl_doc;
+ if (document)
+ {
+ std::string temp = module;
+ std::transform (temp.begin(), temp.end(), temp.begin(), tocharlower);
+ m_pIndexerPreProcessor->processDocument(document, URLEncoder::encode(documentPath) );
+ }
+ }
+
+ }
+
+ }
+ catch( const HelpProcessingException& )
+ {
+ // catch HelpProcessingException to avoid locking data bases
+ if( pFileHelpText_DBHelp != nullptr )
+ fclose( pFileHelpText_DBHelp );
+ if( pFileDbBase_DBHelp != nullptr )
+ fclose( pFileDbBase_DBHelp );
+ throw;
+ }
+
+ if( pFileHelpText_DBHelp != nullptr )
+ fclose( pFileHelpText_DBHelp );
+ if( pFileDbBase_DBHelp != nullptr )
+ fclose( pFileDbBase_DBHelp );
+
+ helpKeyword.dump_DBHelp( keyWordFileName_DBHelp);
+
+ if( bExtensionMode )
+ return;
+
+ // New index
+ for (auto const& additionalFile : additionalFiles)
+ {
+ const std::string &additionalFileName = additionalFile.second;
+ const std::string &additionalFileKey = additionalFile.first;
+
+ fs::path fsAdditionalFileName( additionalFileName, fs::native );
+ HCDBG({
+ std::string aNativeStr = fsAdditionalFileName.native_file_string();
+ const char* pStr = aNativeStr.c_str();
+ std::cerr << pStr << std::endl;
+ });
+
+ fs::path fsTargetName( indexDirParentName / additionalFileKey );
+
+ fs::copy( fsAdditionalFileName, fsTargetName );
+ }
+}
+
+
+void HelpLinker::main( std::vector<std::string> &args,
+ std::string const * pExtensionPath, std::string const * pDestination,
+ const OUString* pOfficeHelpPath )
+{
+ bExtensionMode = false;
+ helpFiles.clear();
+
+ if ((!args.empty()) && args[0][0] == '@')
+ {
+ std::vector<std::string> stringList;
+ std::ifstream fileReader(args[0].substr(1).c_str());
+
+ while (fileReader)
+ {
+ std::string token;
+ fileReader >> token;
+ if (!token.empty())
+ stringList.push_back(token);
+ }
+ fileReader.close();
+
+ args = stringList;
+ }
+
+ size_t i = 0;
+ bool bSrcOption = false;
+ while (i < args.size())
+ {
+ if (args[i].compare("-extlangsrc") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "extension source missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ extsource = args[i];
+ }
+ else if (args[i].compare("-extlangdest") == 0)
+ {
+ //If this argument is not provided then the location provided in -extsource will
+ //also be the destination
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "extension destination missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ extdestination = args[i];
+ }
+ else if (args[i].compare("-src") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "sourceroot missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ bSrcOption = true;
+ sourceRoot = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-compact") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "compactStylesheet missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ compactStylesheet = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-sty") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "embeddingStylesheet missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ embeddStylesheet = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-zipdir") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "idxtemp missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ zipdir = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-idxcaption") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "idxcaption stylesheet missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ idxCaptionStylesheet = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-idxcontent") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "idxcontent stylesheet missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ idxContentStylesheet = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-o") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "outputfilename missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ outputFile = fs::path(args[i], fs::native);
+ }
+ else if (args[i].compare("-mod") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "module name missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ module = args[i];
+ }
+ else if (args[i].compare("-lang") == 0)
+ {
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "language name missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ lang = args[i];
+ }
+ else if (args[i].compare("-hid") == 0)
+ {
+ ++i;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, "obsolete -hid argument used" );
+ }
+ else if (args[i].compare("-add") == 0)
+ {
+ std::string addFile, addFileUnderPath;
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "pathname missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ addFileUnderPath = args[i];
+ ++i;
+ if (i >= args.size())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "pathname missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ addFile = args[i];
+ if (!addFileUnderPath.empty() && !addFile.empty())
+ additionalFiles[addFileUnderPath] = addFile;
+ }
+ else if (args[i].compare("-nolangroot") == 0)
+ m_bUseLangRoot = false;
+ else if (args[i].compare("-noindex") == 0)
+ m_bCreateIndex = false;
+ else
+ helpFiles.push_back(args[i]);
+ ++i;
+ }
+
+ //We can be called from the helplinker executable or the extension manager
+ //In the latter case extsource is not used.
+ if( (pExtensionPath && pExtensionPath->length() > 0 && pOfficeHelpPath)
+ || !extsource.empty())
+ {
+ bExtensionMode = true;
+ if (!extsource.empty())
+ {
+ //called from helplinker.exe, pExtensionPath and pOfficeHelpPath
+ //should be NULL
+ sourceRoot = fs::path(extsource, fs::native);
+ extensionPath = sourceRoot.toUTF8();
+
+ if (extdestination.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "-extlangdest is missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ else
+ {
+ //Convert from system path to file URL!!!
+ fs::path p(extdestination, fs::native);
+ extensionDestination = p.toUTF8();
+ }
+ }
+ else
+ { //called from extension manager
+ extensionPath = *pExtensionPath;
+ sourceRoot = fs::path(extensionPath);
+ extensionDestination = *pDestination;
+ }
+ //check if -src option was used. This option must not be used
+ //when extension help is compiled.
+ if (bSrcOption)
+ {
+ std::stringstream aStrStream;
+ aStrStream << "-src must not be used together with -extsource missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ }
+
+ if (!bExtensionMode && zipdir.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "no index dir given" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+
+ if ( (!bExtensionMode && idxCaptionStylesheet.empty())
+ || (!extsource.empty() && idxCaptionStylesheet.empty()) )
+ {
+ //No extension mode and extension mode using commandline
+ //!extsource.empty indicates extension mode using commandline
+ // -idxcaption parameter is required
+ std::stringstream aStrStream;
+ aStrStream << "no index caption stylesheet given" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ else if ( bExtensionMode && extsource.empty())
+ {
+ //This part is used when compileExtensionHelp is called from the extensions manager.
+ //If extension help is compiled using helplinker in the build process
+ OUString aIdxCaptionPathFileURL = *pOfficeHelpPath + "/idxcaption.xsl";
+
+ OString aOStr_IdxCaptionPathFileURL( OUStringToOString
+ ( aIdxCaptionPathFileURL, osl_getThreadTextEncoding() ) );
+ std::string aStdStr_IdxCaptionPathFileURL( aOStr_IdxCaptionPathFileURL );
+
+ idxCaptionStylesheet = fs::path( aStdStr_IdxCaptionPathFileURL );
+ }
+
+ if ( (!bExtensionMode && idxContentStylesheet.empty())
+ || (!extsource.empty() && idxContentStylesheet.empty()) )
+ {
+ //No extension mode and extension mode using commandline
+ //!extsource.empty indicates extension mode using commandline
+ // -idxcontent parameter is required
+ std::stringstream aStrStream;
+ aStrStream << "no index content stylesheet given" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ else if ( bExtensionMode && extsource.empty())
+ {
+ //If extension help is compiled using helplinker in the build process
+ //then -idxcontent must be supplied
+ //This part is used when compileExtensionHelp is called from the extensions manager.
+ OUString aIdxContentPathFileURL = *pOfficeHelpPath + "/idxcontent.xsl";
+
+ OString aOStr_IdxContentPathFileURL( OUStringToOString
+ ( aIdxContentPathFileURL, osl_getThreadTextEncoding() ) );
+ std::string aStdStr_IdxContentPathFileURL( aOStr_IdxContentPathFileURL );
+
+ idxContentStylesheet = fs::path( aStdStr_IdxContentPathFileURL );
+ }
+ if (!bExtensionMode && embeddStylesheet.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "no embedding resolving file given" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ if (sourceRoot.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "no sourceroot given" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ if (!bExtensionMode && outputFile.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "no output file given" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ if (module.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "module missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ if (!bExtensionMode && lang.empty())
+ {
+ std::stringstream aStrStream;
+ aStrStream << "language missing" << std::endl;
+ throw HelpProcessingException( HelpProcessingErrorClass::General, aStrStream.str() );
+ }
+ link();
+}
+
+// Variable to set an exception in "C" StructuredXMLErrorFunction
+static const HelpProcessingException* GpXMLParsingException = nullptr;
+
+extern "C" {
+
+#if LIBXML_VERSION >= 21200
+static void StructuredXMLErrorFunction(SAL_UNUSED_PARAMETER void *, const xmlError* error)
+#else
+static void StructuredXMLErrorFunction(SAL_UNUSED_PARAMETER void *, xmlErrorPtr error)
+#endif
+{
+ std::string aXMLParsingFile;
+ if( error->file != nullptr )
+ aXMLParsingFile = error->file;
+ int nXMLParsingLine = error->line;
+ GpXMLParsingException = new HelpProcessingException(error->message, aXMLParsingFile, nXMLParsingLine);
+
+ // Reset error handler
+ xmlSetStructuredErrorFunc( nullptr, nullptr );
+}
+
+}
+
+HelpProcessingErrorInfo& HelpProcessingErrorInfo::operator=( const struct HelpProcessingException& e )
+{
+ m_eErrorClass = e.m_eErrorClass;
+ m_aErrorMsg = OStringToOUString( std::string_view(e.m_aErrorMsg), osl_getThreadTextEncoding() );
+ m_aXMLParsingFile = OStringToOUString( std::string_view(e.m_aXMLParsingFile), osl_getThreadTextEncoding() );
+ m_nXMLParsingLine = e.m_nXMLParsingLine;
+ return *this;
+}
+
+
+// Returns true in case of success, false in case of error
+bool compileExtensionHelp
+(
+ const OUString& aOfficeHelpPath,
+ std::u16string_view aExtensionName,
+ std::u16string_view aExtensionLanguageRoot,
+ sal_Int32 nXhpFileCount, const OUString* pXhpFiles,
+ std::u16string_view aDestination,
+ HelpProcessingErrorInfo& o_rHelpProcessingErrorInfo
+)
+{
+ bool bSuccess = true;
+
+ std::vector<std::string> args;
+ args.reserve(nXhpFileCount + 2);
+ args.push_back(std::string("-mod"));
+ OString aOExtensionName = OUStringToOString( aExtensionName, osl_getThreadTextEncoding() );
+ args.push_back(std::string(aOExtensionName));
+
+ for( sal_Int32 iXhp = 0 ; iXhp < nXhpFileCount ; ++iXhp )
+ {
+ OUString aXhpFile = pXhpFiles[iXhp];
+
+ OString aOXhpFile = OUStringToOString( aXhpFile, osl_getThreadTextEncoding() );
+ args.push_back(std::string(aOXhpFile));
+ }
+
+ OString aOExtensionLanguageRoot = OUStringToOString( aExtensionLanguageRoot, osl_getThreadTextEncoding() );
+ const char* pExtensionPath = aOExtensionLanguageRoot.getStr();
+ std::string aStdStrExtensionPath = pExtensionPath;
+ OString aODestination = OUStringToOString(aDestination, osl_getThreadTextEncoding());
+ const char* pDestination = aODestination.getStr();
+ std::string aStdStrDestination = pDestination;
+
+ // Set error handler
+ xmlSetStructuredErrorFunc( nullptr, StructuredXMLErrorFunction );
+ try
+ {
+ HelpLinker aHelpLinker;
+ aHelpLinker.main( args, &aStdStrExtensionPath, &aStdStrDestination, &aOfficeHelpPath );
+ }
+ catch( const HelpProcessingException& e )
+ {
+ if( GpXMLParsingException != nullptr )
+ {
+ o_rHelpProcessingErrorInfo = *GpXMLParsingException;
+ delete GpXMLParsingException;
+ GpXMLParsingException = nullptr;
+ }
+ else
+ {
+ o_rHelpProcessingErrorInfo = e;
+ }
+ bSuccess = false;
+ }
+ // Reset error handler
+ xmlSetStructuredErrorFunc( nullptr, nullptr );
+
+ // i83624: Tree files
+ // The following basically checks if the help.tree is well formed XML.
+ // Apparently there have been cases when translations contained
+ // non-well-formed XML in the past.
+ OUString aTreeFileURL = OUString::Concat(aExtensionLanguageRoot) + "/help.tree";
+ osl::DirectoryItem aTreeFileItem;
+ osl::FileBase::RC rcGet = osl::DirectoryItem::get( aTreeFileURL, aTreeFileItem );
+ osl::FileStatus aFileStatus( osl_FileStatus_Mask_FileSize );
+ if( rcGet == osl::FileBase::E_None &&
+ aTreeFileItem.getFileStatus( aFileStatus ) == osl::FileBase::E_None &&
+ aFileStatus.isValid( osl_FileStatus_Mask_FileSize ) )
+ {
+ sal_uInt64 ret, len = aFileStatus.getFileSize();
+ std::unique_ptr<char[]> s(new char[ int(len) ]); // the buffer to hold the installed files
+ osl::File aFile( aTreeFileURL );
+ (void)aFile.open( osl_File_OpenFlag_Read );
+ aFile.read( s.get(), len, ret );
+ aFile.close();
+
+ XML_Parser parser = XML_ParserCreate( nullptr );
+ XML_Status parsed = XML_Parse( parser, s.get(), int( len ), true );
+
+ if (XML_STATUS_ERROR == parsed)
+ {
+ XML_Error nError = XML_GetErrorCode( parser );
+ o_rHelpProcessingErrorInfo.m_eErrorClass = HelpProcessingErrorClass::XmlParsing;
+ o_rHelpProcessingErrorInfo.m_aErrorMsg = OUString::createFromAscii( XML_ErrorString( nError ) );
+ o_rHelpProcessingErrorInfo.m_aXMLParsingFile = aTreeFileURL;
+ // CRASHES!!! o_rHelpProcessingErrorInfo.m_nXMLParsingLine = XML_GetCurrentLineNumber( parser );
+ bSuccess = false;
+ }
+
+ XML_ParserFree( parser );
+ }
+
+ return bSuccess;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/HelpLinker_main.cxx b/helpcompiler/source/HelpLinker_main.cxx
new file mode 100644
index 0000000000..70ff1f6523
--- /dev/null
+++ b/helpcompiler/source/HelpLinker_main.cxx
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <HelpCompiler.hxx>
+#include <HelpLinker.hxx>
+#include <sal/main.h>
+#include <iostream>
+
+SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
+{
+ std::vector<std::string> args;
+ for (int i = 1; i < argc; ++i)
+ args.push_back(std::string(argv[i]));
+ try
+ {
+ HelpLinker aHelpLinker;
+ aHelpLinker.main(args);
+ }
+ catch (const HelpProcessingException& e)
+ {
+ std::cerr << e.m_aErrorMsg;
+ exit(1);
+ }
+ catch (const std::exception& e)
+ {
+ std::cerr << e.what();
+ exit(1);
+ }
+ return 0;
+}
diff --git a/helpcompiler/source/HelpSearch.cxx b/helpcompiler/source/HelpSearch.cxx
new file mode 100644
index 0000000000..982e672c45
--- /dev/null
+++ b/helpcompiler/source/HelpSearch.cxx
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <helpcompiler/HelpSearch.hxx>
+#include <osl/file.hxx>
+#include <osl/thread.hxx>
+
+#include "LuceneHelper.hxx"
+#include <CLucene.h>
+
+HelpSearch::HelpSearch(OUString const &indexDir)
+{
+ OUString ustrSystemPath;
+ osl::File::getSystemPathFromFileURL(indexDir, ustrSystemPath);
+ d_indexDir = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
+}
+
+void HelpSearch::query(OUString const &queryStr, bool captionOnly,
+ std::vector<OUString> &rDocuments, std::vector<float> &rScores) {
+
+ lucene::index::IndexReader *reader = lucene::index::IndexReader::open(d_indexDir.getStr());
+ lucene::search::IndexSearcher searcher(reader);
+
+ const TCHAR* field = captionOnly ? L"caption" : L"content";
+
+ bool isWildcard = queryStr[queryStr.getLength() - 1] == L'*';
+ std::vector<TCHAR> aQueryStr(OUStringToTCHARVec(queryStr));
+ lucene::search::Query *pQuery;
+ if (isWildcard)
+ pQuery = _CLNEW lucene::search::WildcardQuery(_CLNEW lucene::index::Term(field, aQueryStr.data()));
+ else
+ pQuery = _CLNEW lucene::search::TermQuery(_CLNEW lucene::index::Term(field, aQueryStr.data()));
+
+ lucene::search::Hits *hits = searcher.search(pQuery);
+ for (size_t i = 0; i < hits->length(); ++i) {
+ lucene::document::Document &doc = hits->doc(i); // Document* belongs to Hits.
+ wchar_t const *path = doc.get(L"path");
+ rDocuments.push_back(TCHARArrayToOUString(path != nullptr ? path : L""));
+ rScores.push_back(hits->score(i));
+ }
+
+ _CLDELETE(hits);
+ _CLDELETE(pQuery);
+
+ reader->close();
+ _CLDELETE(reader);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/LuceneHelper.cxx b/helpcompiler/source/LuceneHelper.cxx
new file mode 100644
index 0000000000..d6a2de2523
--- /dev/null
+++ b/helpcompiler/source/LuceneHelper.cxx
@@ -0,0 +1,39 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "LuceneHelper.hxx"
+
+std::vector<TCHAR> OUStringToTCHARVec(OUString const& rStr)
+{
+ //UTF-16
+ if (sizeof(TCHAR) == sizeof(sal_Unicode))
+ return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength() + 1);
+
+ //UTF-32
+ std::vector<TCHAR> aRet;
+ for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength();)
+ {
+ const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex);
+ aRet.push_back(nCode);
+ }
+ aRet.push_back(0);
+ return aRet;
+}
+
+OUString TCHARArrayToOUString(TCHAR const* str)
+{
+ // UTF-16
+ if (sizeof(TCHAR) == sizeof(sal_Unicode))
+ return OUString(reinterpret_cast<const sal_Unicode*>(str));
+
+ // UTF-32
+ return OUString(reinterpret_cast<const sal_uInt32*>(str), wcslen(str));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/LuceneHelper.hxx b/helpcompiler/source/LuceneHelper.hxx
new file mode 100644
index 0000000000..6e8208b1b2
--- /dev/null
+++ b/helpcompiler/source/LuceneHelper.hxx
@@ -0,0 +1,27 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+#pragma once
+
+#if defined(__GNUC__)
+#pragma GCC visibility push(default)
+#endif
+
+#include <CLucene.h>
+
+#if defined(__GNUC__)
+#pragma GCC visibility pop
+#endif
+
+#include <rtl/ustring.hxx>
+#include <vector>
+
+std::vector<TCHAR> OUStringToTCHARVec(OUString const& rStr);
+OUString TCHARArrayToOUString(TCHAR const* str);
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */