diff options
Diffstat (limited to '')
52 files changed, 8308 insertions, 0 deletions
diff --git a/l10ntools/Executable_cfgex.mk b/l10ntools/Executable_cfgex.mk new file mode 100644 index 0000000000..b7441792c3 --- /dev/null +++ b/l10ntools/Executable_cfgex.mk @@ -0,0 +1,38 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,cfgex)) + +$(eval $(call gb_Executable_set_include,cfgex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,cfgex,\ + sal \ +)) + +$(eval $(call gb_Executable_add_scanners,cfgex,\ + l10ntools/source/cfglex \ +)) + +$(eval $(call gb_Executable_use_static_libraries,cfgex,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,cfgex,\ + l10ntools/source/cfgmerge \ +)) + +$(eval $(call gb_Executable_use_externals,cfgex,\ + boost_headers \ + libxml2 \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_helpex.mk b/l10ntools/Executable_helpex.mk new file mode 100644 index 0000000000..c7f5cce1c0 --- /dev/null +++ b/l10ntools/Executable_helpex.mk @@ -0,0 +1,40 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,helpex)) + +$(eval $(call gb_Executable_set_include,helpex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,helpex,\ + sal \ +)) + +$(eval $(call gb_Executable_use_externals,helpex,\ + expat \ + boost_headers \ + icu_headers \ + libxml2 \ + icuuc \ + icui18n \ +)) + +$(eval $(call gb_Executable_use_static_libraries,helpex,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,helpex,\ + l10ntools/source/helpex \ + l10ntools/source/xmlparse \ + l10ntools/source/helpmerge \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_idxdict.mk b/l10ntools/Executable_idxdict.mk new file mode 100644 index 0000000000..bdc4353ca0 --- /dev/null +++ b/l10ntools/Executable_idxdict.mk @@ -0,0 +1,16 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,idxdict)) + +$(eval $(call gb_Executable_add_exception_objects,idxdict,\ + l10ntools/source/idxdict/idxdict \ +)) + +# vim: set shiftwidth=4 tabstop=4 noexpandtab: diff --git a/l10ntools/Executable_localize.mk b/l10ntools/Executable_localize.mk new file mode 100644 index 0000000000..975725ae8a --- /dev/null +++ b/l10ntools/Executable_localize.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,localize)) + +$(eval $(call gb_Executable_set_include,localize,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,localize,\ + sal \ +)) + +$(eval $(call gb_Executable_use_static_libraries,localize,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,localize,\ + l10ntools/source/localize \ +)) + +$(eval $(call gb_Executable_use_externals,localize,\ + boost_headers \ + libxml2 \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_pocheck.mk b/l10ntools/Executable_pocheck.mk new file mode 100644 index 0000000000..2619ac6f4b --- /dev/null +++ b/l10ntools/Executable_pocheck.mk @@ -0,0 +1,34 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,pocheck)) + +$(eval $(call gb_Executable_set_include,pocheck,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,pocheck,\ + sal \ +)) + +$(eval $(call gb_Executable_use_static_libraries,pocheck,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,pocheck,\ + l10ntools/source/pocheck \ +)) + +$(eval $(call gb_Executable_use_externals,pocheck,\ + boost_headers \ + libxml2 \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_propex.mk b/l10ntools/Executable_propex.mk new file mode 100644 index 0000000000..c359061fd0 --- /dev/null +++ b/l10ntools/Executable_propex.mk @@ -0,0 +1,36 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + + +$(eval $(call gb_Executable_Executable,propex)) + +$(eval $(call gb_Executable_set_include,propex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,propex,\ + sal \ +)) + +$(eval $(call gb_Executable_use_static_libraries,propex,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,propex,\ + l10ntools/source/propmerge \ + l10ntools/source/propex \ +)) + +$(eval $(call gb_Executable_use_externals,propex,\ + boost_headers \ + libxml2 \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/l10ntools/Executable_treex.mk b/l10ntools/Executable_treex.mk new file mode 100644 index 0000000000..976dc13987 --- /dev/null +++ b/l10ntools/Executable_treex.mk @@ -0,0 +1,36 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + + +$(eval $(call gb_Executable_Executable,treex)) + +$(eval $(call gb_Executable_set_include,treex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,treex,\ + sal \ +)) + +$(eval $(call gb_Executable_use_static_libraries,treex,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,treex,\ + l10ntools/source/treemerge \ + l10ntools/source/treex \ +)) + +$(eval $(call gb_Executable_use_externals,treex,\ + boost_headers \ + libxml2 \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/l10ntools/Executable_ulfex.mk b/l10ntools/Executable_ulfex.mk new file mode 100644 index 0000000000..dd4c3181b9 --- /dev/null +++ b/l10ntools/Executable_ulfex.mk @@ -0,0 +1,35 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,ulfex)) + +$(eval $(call gb_Executable_set_include,ulfex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,ulfex,\ + sal \ +)) + +$(eval $(call gb_Executable_use_static_libraries,ulfex,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,ulfex,\ + l10ntools/source/lngmerge \ + l10ntools/source/lngex \ +)) + +$(eval $(call gb_Executable_use_externals,ulfex,\ + boost_headers \ + libxml2 \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/Executable_xrmex.mk b/l10ntools/Executable_xrmex.mk new file mode 100644 index 0000000000..109198dc2e --- /dev/null +++ b/l10ntools/Executable_xrmex.mk @@ -0,0 +1,41 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Executable_Executable,xrmex)) + +$(eval $(call gb_Executable_set_include,xrmex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Executable_use_libraries,xrmex,\ + sal \ +)) + +$(eval $(call gb_Executable_add_scanners,xrmex,\ + l10ntools/source/xrmlex \ +)) + +$(eval $(call gb_Executable_use_static_libraries,xrmex,\ + transex \ +)) + +$(eval $(call gb_Executable_add_exception_objects,xrmex,\ + l10ntools/source/xrmmerge \ +)) + +$(eval $(call gb_Executable_use_externals,xrmex,\ + boost_headers \ + libxml2 \ + icuuc \ + icui18n \ + icu_headers \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/IwyuFilter_l10ntools.yaml b/l10ntools/IwyuFilter_l10ntools.yaml new file mode 100644 index 0000000000..171e54ae6b --- /dev/null +++ b/l10ntools/IwyuFilter_l10ntools.yaml @@ -0,0 +1,9 @@ +--- +assumeFilename: l10ntools/source/helpmerge.cxx +excludelist: + l10ntools/source/idxdict/idxdict.cxx: + # Actually needed + - fstream + l10ntools/source/xmlparse.cxx: + # Actually needed + - fstream diff --git a/l10ntools/Makefile b/l10ntools/Makefile new file mode 100644 index 0000000000..0997e62848 --- /dev/null +++ b/l10ntools/Makefile @@ -0,0 +1,14 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/l10ntools/Module_l10ntools.mk b/l10ntools/Module_l10ntools.mk new file mode 100644 index 0000000000..3806c7d6fe --- /dev/null +++ b/l10ntools/Module_l10ntools.mk @@ -0,0 +1,25 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,l10ntools)) + +$(eval $(call gb_Module_add_targets_for_build,l10ntools,\ + Executable_helpex \ + Executable_idxdict \ + Executable_ulfex \ + Executable_cfgex \ + Executable_xrmex \ + Executable_localize \ + Executable_pocheck \ + Executable_propex \ + Executable_treex \ + StaticLibrary_transex \ +)) + +# vim:set noet sw=4 ts=4: diff --git a/l10ntools/README.md b/l10ntools/README.md new file mode 100644 index 0000000000..b55377cf4a --- /dev/null +++ b/l10ntools/README.md @@ -0,0 +1,5 @@ +# Localization (l10n) Tools + +`l10ntools` (l10n = localization) contains a number of tools that extract +translatable content from source code and merge translations back to +source code during the build. diff --git a/l10ntools/StaticLibrary_transex.mk b/l10ntools/StaticLibrary_transex.mk new file mode 100644 index 0000000000..57285da944 --- /dev/null +++ b/l10ntools/StaticLibrary_transex.mk @@ -0,0 +1,30 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_StaticLibrary_StaticLibrary,transex)) + +$(eval $(call gb_StaticLibrary_set_include,transex,\ + -I$(SRCDIR)/l10ntools/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_StaticLibrary_use_externals,transex,\ + boost_headers \ + libxml2 \ +)) + +$(eval $(call gb_StaticLibrary_add_exception_objects,transex,\ + l10ntools/source/helper \ + l10ntools/source/common \ + l10ntools/source/merge \ + l10ntools/source/po \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/l10ntools/inc/cfglex.hxx b/l10ntools/inc/cfglex.hxx new file mode 100644 index 0000000000..af0324f642 --- /dev/null +++ b/l10ntools/inc/cfglex.hxx @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_CFGLEX_HXX +#define INCLUDED_L10NTOOLS_INC_CFGLEX_HXX + +#include <sal/config.h> + +#include <stdio.h> + +extern "C" void workOnTokenSet(int, char*); +extern "C" FILE* init(int, char**); + +void yyerror(char const*); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/cfgmerge.hxx b/l10ntools/inc/cfgmerge.hxx new file mode 100644 index 0000000000..a2f59e1e6e --- /dev/null +++ b/l10ntools/inc/cfgmerge.hxx @@ -0,0 +1,182 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_CFGMERGE_HXX +#define INCLUDED_L10NTOOLS_INC_CFGMERGE_HXX + +#include <sal/config.h> + +#include <fstream> +#include <unordered_map> +#include <memory> +#include <utility> +#include <vector> +#include "po.hxx" +#include "export.hxx" + +typedef std::unordered_map<OString, OString> OStringHashMap; + + + + +class CfgStackData +{ +friend class CfgParser; +friend class CfgExport; +friend class CfgMerge; +private: + OString sTagType; + OString sIdentifier; + + OString sResTyp; + + OString sTextTag; + OString sEndTextTag; + + OStringHashMap sText; +public: + CfgStackData(OString _sTag, OString _sId) + : sTagType(std::move( _sTag )), sIdentifier(std::move( _sId )) + {} + + const OString &GetTagType() const { return sTagType; } + const OString &GetIdentifier() const { return sIdentifier; } + +}; + + + + +class CfgStack +{ +private: + std::vector< CfgStackData* > maList; + +public: + CfgStack() {} + ~CfgStack(); + + CfgStackData *Push(const OString &rTag, const OString &rId); + void Pop() + { + if (!maList.empty()) + { + delete maList.back(); + maList.pop_back(); + } + } + + CfgStackData *GetStackData(); + + OString GetAccessPath( size_t nPos ); + + size_t size() const { return maList.size(); } +}; + +/// Parser for *.xcu files +class CfgParser +{ +protected: + OString sCurrentResTyp; + OString sCurrentIsoLang; + OString sCurrentText; + + OString sLastWhitespace; + + CfgStack aStack; + CfgStackData *pStackData; + + bool bLocalize; + + virtual void WorkOnText( + OString &rText, + const OString &rLangIndex )=0; + + virtual void WorkOnResourceEnd()=0; + + virtual void Output(const OString & rOutput)=0; + +private: + void ExecuteAnalyzedToken( int nToken, char *pToken ); + void AddText( + OString &rText, + const OString &rIsoLang, + const OString &rResTyp ); + + static bool IsTokenClosed(std::string_view rToken); + +public: + CfgParser(); + virtual ~CfgParser(); + + void Execute( int nToken, char * pToken ); +}; + +/// Export strings from *.xcu files +class CfgExport : public CfgParser +{ +private: + OString sPath; + PoOfstream pOutputStream; + +protected: + virtual void WorkOnText( + OString &rText, + const OString &rIsoLang + ) override; + + void WorkOnResourceEnd() override; + void Output(const OString& rOutput) override; +public: + CfgExport( + const OString &rOutputFile, + OString sFilePath + ); + virtual ~CfgExport() override; +}; + +/// Merge strings to *.xcu files +class CfgMerge : public CfgParser +{ +private: + std::unique_ptr<MergeDataFile> pMergeDataFile; + std::vector<OString> aLanguages; + std::unique_ptr<ResData> pResData; + + OString sFilename; + bool bEnglish; + + std::ofstream pOutputStream; + +protected: + virtual void WorkOnText(OString &rText, const OString &rLangIndex) override; + + void WorkOnResourceEnd() override; + + void Output(const OString& rOutput) override; +public: + CfgMerge( + const OString &rMergeSource, const OString &rOutputFile, + OString sFilename, const OString &rLanguage ); + virtual ~CfgMerge() override; +}; + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/common.hxx b/l10ntools/inc/common.hxx new file mode 100644 index 0000000000..ec5ed0c195 --- /dev/null +++ b/l10ntools/inc/common.hxx @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +// Methods used by all of executables + +#ifndef INCLUDED_L10NTOOLS_INC_COMMON_HXX +#define INCLUDED_L10NTOOLS_INC_COMMON_HXX + +#include <sal/config.h> + +#include <string_view> + +#include <rtl/string.hxx> +#include "po.hxx" + +namespace common { + +/// Result type of handleArguments() +struct HandledArgs +{ + OString m_sInputFile; + OString m_sOutputFile; + OString m_sMergeSrc; + OString m_sLanguage; + bool m_bMergeMode; + bool m_bUTF8BOM; + HandledArgs() + : m_sInputFile( OString() ) + , m_sOutputFile( OString() ) + , m_sMergeSrc( OString() ) + , m_sLanguage( OString() ) + , m_bMergeMode( false ) + , m_bUTF8BOM( false ) + {} +}; + +/// Handle command line parameters +bool handleArguments(int argc, char * argv[], HandledArgs& o_aHandledArgs); + +/// Write out a help about usage +void writeUsage(const OString& rName, const OString& rFileType); + +/// Write out a PoEntry with attention to exceptions +void writePoEntry( + const OString& rExecutable, PoOfstream& rPoStream, const OString& rSourceFile, + std::string_view rResType, const OString& rGroupId, const OString& rLocalId, + const OString& rHelpText, const OString& rText, const PoEntry::TYPE eType = PoEntry::TTEXT ); + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/export.hxx b/l10ntools/inc/export.hxx new file mode 100644 index 0000000000..251d059ac2 --- /dev/null +++ b/l10ntools/inc/export.hxx @@ -0,0 +1,144 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_EXPORT_HXX +#define INCLUDED_L10NTOOLS_INC_EXPORT_HXX + +#include <sal/config.h> + +#include <rtl/string.hxx> + +#include <set> +#include <unordered_map> +#include <memory> +#include <vector> +#include <string_view> + +#ifdef _WIN32 +#include <direct.h> +#endif + +#define NO_TRANSLATE_ISO "x-no-translate" + +class MergeEntrys; + +typedef std::unordered_map<OString, OString> + OStringHashMap; + +typedef std::unordered_map<OString, bool> + OStringBoolHashMap; + +#define SOURCE_LANGUAGE "en-US" +#define X_COMMENT "x-comment" + + + + +/// Purpose: holds mandatory data to export a single res +class ResData +{ +public: + ResData( OString rGId ); + ResData( OString sGId , OString sFilename ); + + OString sResTyp; + OString sId; + OString sGId; + OString sFilename; + + OStringHashMap sText; +}; + + + + +class ParserQueue; + + + +/// Purpose: holds information of data to merge +class MergeEntrys +{ +friend class MergeDataFile; +private: + OStringHashMap sText; + OStringBoolHashMap bTextFirst; + OStringHashMap sQuickHelpText; + OStringBoolHashMap bQuickHelpTextFirst; + OStringHashMap sTitle; + OStringBoolHashMap bTitleFirst; + +public: + MergeEntrys(){}; + void InsertEntry(const OString &rId, const OString &rText, + const OString &rQuickHelpText, const OString &rTitle) + { + + sText[ rId ] = rText; + bTextFirst[ rId ] = true; + sQuickHelpText[ rId ] = rQuickHelpText; + bQuickHelpTextFirst[ rId ] = true; + sTitle[ rId ] = rTitle; + bTitleFirst[ rId ] = true; + } + bool GetText( OString &rReturn, const OString &nLangIndex, bool bDel = false ); + + /** + Generate QTZ string with ResData + For executable which works one language and without PO files. + */ + static OString GetQTZText(const ResData& rResData, std::string_view rOrigText); + +}; + + + +/// Purpose: holds information of data to merge, read from PO file +class MergeDataFile +{ + private: + std::unordered_map<OString, std::unique_ptr<MergeEntrys>> aMap; + std::set<OString> aLanguageSet; + + MergeEntrys *GetMergeData( ResData *pResData , bool bCaseSensitive = false ); + void InsertEntry(std::string_view rTYP, std::string_view rGID, + std::string_view rLID, const OString &nLang, + const OString &rTEXT, const OString &rQHTEXT, + const OString &rTITLE, std::string_view sFilename, + bool bFirstLang, bool bCaseSensitive); + public: + explicit MergeDataFile( + const OString &rFileName, std::string_view rFile, + bool bCaseSensitive, bool bWithQtz = true ); + ~MergeDataFile(); + + + std::vector<OString> GetLanguages() const; + + MergeEntrys *GetMergeEntrys( ResData *pResData ); + MergeEntrys *GetMergeEntrysCaseSensitive( ResData *pResData ); + + static OString CreateKey(std::string_view rTYP, std::string_view rGID, + std::string_view rLID, std::string_view rFilename, bool bCaseSensitive); +}; + + +#endif // INCLUDED_L10NTOOLS_INC_EXPORT_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/helper.hxx b/l10ntools/inc/helper.hxx new file mode 100644 index 0000000000..c1347db04c --- /dev/null +++ b/l10ntools/inc/helper.hxx @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +// Helper string methods + +#ifndef INCLUDED_L10NTOOLS_INC_HELPER_HXX +#define INCLUDED_L10NTOOLS_INC_HELPER_HXX + +#include <sal/config.h> + +#include <string_view> + +#include <libxml/xmlstring.h> + +#include <rtl/string.hxx> + + +namespace helper { + +/// Escape all given character in the text +OString escapeAll( + std::string_view rText, std::string_view rUnEscaped, std::string_view rEscaped ); +/// Unescape all given character in the text +OString unEscapeAll( + std::string_view rText, std::string_view rEscaped, std::string_view rUnEscaped ); + +/// Convert special characters to XML entity references +OString QuotHTML( std::string_view rString ); +/// Convert XML entity references to single characters +OString UnQuotHTML( std::string_view rString ); + +/// Check whether text is a valid XML expression +bool isWellFormedXML( std::string_view text ); + +/// Convert xmlChar* to OString +OString xmlStrToOString( const xmlChar* pString ); + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/helpmerge.hxx b/l10ntools/inc/helpmerge.hxx new file mode 100644 index 0000000000..9da891e6b0 --- /dev/null +++ b/l10ntools/inc/helpmerge.hxx @@ -0,0 +1,66 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_HELPMERGE_HXX +#define INCLUDED_L10NTOOLS_INC_HELPMERGE_HXX + +#include <sal/config.h> + +#include <string_view> + +#include "export.hxx" +#include "xmlparse.hxx" + +#include <rtl/string.hxx> + +/// This Class is responsible for extracting/merging OpenOffice XML Helpfiles +class HelpParser +{ +private: + OString sHelpFile; + +#if OSL_DEBUG_LEVEL > 2 + /// Debugmethod, prints the content of the map to stdout + static void Dump(LangHashMap* rElem_in , const OString & sKey_in); + + /// Debugmethod, prints the content of the map to stdout + static void Dump(XMLHashMap* rElem_in); +#endif + +public: + HelpParser( OString sHelpFile ); + +/// Method append a PO file with the content of a parsed XML file +/// @PRECOND rHelpFile is valid + static bool CreatePO( const OString &rPOFile_in, const OString &sHelpFile, + XMLFile *pXmlFile, std::string_view rGsi1 ); + +/// Method merges the String into XMLfile, which must point to an existing file. + bool Merge( const OString &rDestinationFile_in , + const OString& sLanguage , MergeDataFile* pMergeDataFile ); + +private: + void MergeSingleFile( XMLFile* file , MergeDataFile* pMergeDataFile , const OString& sLanguage , OString const & sPath ); + + static void ProcessHelp( LangHashMap* aLangHM , const OString& sCur , ResData *pResData , MergeDataFile* pMergeDataFile ); +}; + +#endif // INCLUDED_L10NTOOLS_INC_HELPMERGE_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/lngmerge.hxx b/l10ntools/inc/lngmerge.hxx new file mode 100644 index 0000000000..430fdd1393 --- /dev/null +++ b/l10ntools/inc/lngmerge.hxx @@ -0,0 +1,63 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_LNGMERGE_HXX +#define INCLUDED_L10NTOOLS_INC_LNGMERGE_HXX + +#include <sal/config.h> + +#include <vector> + +#include "export.hxx" +#include "po.hxx" + +#define LNG_OK 0x0000 +#define LNG_COULD_NOT_OPEN 0x0001 + + +/** + Class for localization of *.ulf files + + Parse *.ulf files, extract translatable strings + and merge translated strings. +*/ +class LngParser +{ +private: + std::vector<OString> mvLines; + OString sSource; + std::vector<OString> aLanguages; + + static bool isNextGroup(OString &sGroup_out, std::string_view sLine_in); + static void ReadLine(std::string_view rLine_in, + OStringHashMap &rText_inout); + static void WritePO(PoOfstream &aPOStream, OStringHashMap &rText_inout, + const OString &rActFileName, const OString &rID); +public: + LngParser(OString sLngFile); + ~LngParser(); + + void CreatePO( const OString &rPOFile ); + void Merge(const OString &rPOFile, const OString &rDestinationFile, + std::string_view rLanguage ); +}; + +#endif // INCLUDED_L10NTOOLS_INC_LNGMERGE_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/po.hxx b/l10ntools/inc/po.hxx new file mode 100644 index 0000000000..9b33d2b778 --- /dev/null +++ b/l10ntools/inc/po.hxx @@ -0,0 +1,152 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_L10NTOOLS_INC_PO_HXX +#define INCLUDED_L10NTOOLS_INC_PO_HXX + +#include <fstream> +#include <memory> +#include <string_view> + +#include <rtl/string.hxx> + +class PoOfstream; +class PoIfstream; +class GenPoEntry; + + +/** Interface to use po entries in localization + + PoEntry based on GenPoEntry class which stores attributes + of general po entry(see po.cxx). It makes easy to get/set + all information needed to localize one english(US) string. + It contains some basic checkings and some string + transformations between po string and string used by + localization tools. +*/ +class PoEntry +{ +private: + + std::unique_ptr<GenPoEntry> m_pGenPo; + bool m_bIsInitialized; + +public: + + friend class PoOfstream; + friend class PoIfstream; + + enum TYPE { TTEXT, TQUICKHELPTEXT, TTITLE }; + enum Exception { NOSOURCFILE, NORESTYPE, NOGROUPID, NOSTRING, WRONGHELPTEXT }; + + PoEntry(); + PoEntry( std::string_view rSourceFile, std::string_view rResType, std::string_view rGroupId, + std::string_view rLocalId, std::string_view rHelpText, const OString& rText, + const TYPE eType ); + ~PoEntry(); + + PoEntry( const PoEntry& rPo ); + PoEntry& operator=( const PoEntry& rPo ); + PoEntry& operator=( PoEntry&& rPo ) noexcept; + + OString const & getSourceFile() const; ///< Get name of file from which entry is extracted + OString getGroupId() const; + OString getLocalId() const; + OString getResourceType() const; ///< Get the type of component from which entry is extracted + TYPE getType() const; ///< Get the type of entry + OString const & getMsgCtxt() const; + OString const & getMsgId() const; + OString const & getMsgStr() const; + bool isFuzzy() const; + + /// Check whether po-s belong to the same localization component + static bool IsInSameComp(const PoEntry& rPo1,const PoEntry& rPo2); + static OString genKeyId(const OString& rGenerator); + +}; + +/** Interface to work with header of po/pot files + + This class stores information which is in header of + a po file. It's main function to generate header to + template po files(pot). +*/ +class PoHeader +{ +private: + + std::unique_ptr<GenPoEntry> m_pGenPo; + bool m_bIsInitialized; + +public: + + friend class PoOfstream; + friend class PoIfstream; + + PoHeader( std::string_view rExtSrc ); ///< Template Constructor + PoHeader( std::string_view rExtSrc, const OString& rPoHeaderMsgStr ); + ~PoHeader(); + PoHeader(const PoHeader&) = delete; + PoHeader& operator=(const PoHeader&) = delete; +}; + +/// Interface to write po entry to files as output streams +class PoOfstream +{ +private: + + std::ofstream m_aOutPut; + bool m_bIsAfterHeader; + +public: + + enum OpenMode { TRUNC, APP }; + + PoOfstream(); + PoOfstream(const OString& rFileName, OpenMode aMode ); + ~PoOfstream(); + PoOfstream(const PoOfstream&) = delete; + PoOfstream& operator=(const PoOfstream&) = delete; + bool isOpen() const { return m_aOutPut.is_open(); } + + void open(const OString& rFileName, OpenMode aMode = TRUNC ); + void close(); + void writeHeader(const PoHeader& rHeader); + void writeEntry(const PoEntry& rPo); +}; + +/// Interface to read po entry from files as input streams +class PoIfstream +{ +private: + + std::ifstream m_aInPut; + bool m_bEof; + +public: + + class Exception final : public std::exception { }; + + PoIfstream(); + PoIfstream( const OString& rFileName ); + ~PoIfstream(); + PoIfstream(const PoIfstream&) = delete; + PoIfstream& operator=(const PoIfstream&) = delete; + bool isOpen() const { return m_aInPut.is_open(); } + bool eof() const { return m_bEof; } + + void open(const OString& rFileName); + void open(const OString& rFileName, OString& sPoHeader); + void close(); + void readEntry(PoEntry& rPo); +}; + +#endif // INCLUDED_L10NTOOLS_INC_PO_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/propmerge.hxx b/l10ntools/inc/propmerge.hxx new file mode 100644 index 0000000000..cc9764cccb --- /dev/null +++ b/l10ntools/inc/propmerge.hxx @@ -0,0 +1,43 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_L10NTOOLS_INC_PROPMERGE_HXX +#define INCLUDED_L10NTOOLS_INC_PROPMERGE_HXX + +#include <rtl/string.hxx> +#include <vector> + +/** + Class for localization of *.properties files + + Parse *.properties files, extract translatable strings + and merge translated strings. +*/ + +class PropParser +{ +private: + std::vector<OString> m_vLines; + OString m_sSource; + OString m_sLang; + bool m_bIsInitialized; + +public: + PropParser( + OString sInputFile, OString sLang, + const bool bMergeMode ); + ~PropParser(); + + bool isInitialized() const { return m_bIsInitialized; } + void Extract( const OString& rPOFile ); + void Merge( const OString &rMergeSrc, const OString &rDestinationFile ); +}; + +#endif // INCLUDED_L10NTOOLS_INC_PROPMERGE_HXX +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/tokens.h b/l10ntools/inc/tokens.h new file mode 100644 index 0000000000..95370cf70a --- /dev/null +++ b/l10ntools/inc/tokens.h @@ -0,0 +1,100 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_TOKENS_H +#define INCLUDED_L10NTOOLS_INC_TOKENS_H + +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +/* Tokens for parsing src files */ +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +#define IGNOREDTOKENS 400 /* #include | #pragma | //... | ... */ +#define COMMENT 401 /*... */ +#define DEFINEDRES 402 /* Text = { */ +#define ANYTOKEN 404 /* XYZ */ +#define UNKNOWNCHAR 407 /* . */ +/*------------------------------------------------------ */ +/* prev. tokens will not be executed */ +#define FILTER_LEVEL 500 +/* following tokens will be executed */ +/*------------------------------------------------------ */ +#define CONDITION 501 /* #if... | #endif ... | ... */ +#define EMPTYLINE 502 /* */ +#define RESOURCE 503 /* Menu MID_TEST */ +#define RESOURCEEXPR 504 /* Menu ( MID_TEST + .. ) */ +#define SMALRESOURCE 505 /* PageItem { */ +#define TEXTLINE 506 /* TEXT = "hhh" */ +#define LONGTEXTLINE 507 /* TEXT = "hhh" TEST "HHH" ... */ +#define TEXT 508 /* "Something like this" */ +#define LEVELUP 509 /* { */ +#define LEVELDOWN 510 /* }; */ +#define APPFONTMAPPING 511 /* MAP_APPFONT(10,10) */ +#define ASSIGNMENT 512 /* Something = Anything */ +#define LISTASSIGNMENT 513 /* ...List [xyz]=... */ +#define LISTTEXT 514 /* < "Text" ... > */ +#define RSCDEFINE 515 /* #define MY_TEXT */ +#define RSCDEFINELEND 516 /* */ +#define PRAGMA 519 /* #pragma ... */ +#define LISTTEXT_ 521 /* { "Text" ... } */ +#define NORMDEFINE 524 /* #define ... */ +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +/* Tokens for parsing cfg files */ +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +#define CFG_TAG 501 +#define CFG_TEXT_START 505 +#define CFG_TEXT_END 506 +#define CFG_TEXTCHAR 507 +#define CFG_CLOSETAG 508 +#define CFG_UNKNOWNTAG 509 +#define CFG_TOKEN_PACKAGE 600 +#define CFG_TOKEN_COMPONENT 601 +#define CFG_TOKEN_CONFIGNAME 602 +#define CFG_TOKEN_TEMPLATE 603 +#define CFG_TOKEN_OORNAME 604 +#define CFG_TOKEN_OORVALUE 605 +#define CFG_TOKEN_NO_TRANSLATE 606 + +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +/* Tokens for parsing xrm files */ +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +#define XRM_TEXT_START 507 +#define XRM_TEXT_END 508 +#define XML_TEXTCHAR 600 + +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +/* Tokens for parsing description.xml files */ +/*------------------------------------------------------ */ +/*------------------------------------------------------ */ +#define DESC_DISPLAY_NAME_START 700 +#define DESC_DISPLAY_NAME_END 701 +#define DESC_TEXT_START 702 +#define DESC_TEXT_END 703 +#define DESC_EXTENSION_DESCRIPTION_START 704 +#define DESC_EXTENSION_DESCRIPTION_END 705 +#define DESC_EXTENSION_DESCRIPTION_SRC 706 + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/treemerge.hxx b/l10ntools/inc/treemerge.hxx new file mode 100644 index 0000000000..7f1a72fc7f --- /dev/null +++ b/l10ntools/inc/treemerge.hxx @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_L10NTOOLS_INC_TREEMERGE_HXX +#define INCLUDED_L10NTOOLS_INC_TREEMERGE_HXX + +#include <libxml/tree.h> +#include <rtl/string.hxx> + +/** + Class for localization of *.tree files + + Parse *.tree files, extract translatable strings, + merge translated strings and update reference and title + of referred help files. +*/ +class TreeParser +{ +private: + xmlDocPtr m_pSource; + OString m_sLang; + bool m_bIsInitialized; + +public: + /// Parse tree file + TreeParser( const OString& rInputFile, OString sLang ); + ~TreeParser(); + + bool isInitialized() const { return m_bIsInitialized; } + /// Export strings + void Extract( const OString& rPOFile ); + /// Merge strings to tree file and update reference to help files(xhp) + void Merge( + const OString &rMergeSrc, const OString &rDestinationFile, + const OString &rXhpRoot ); +}; + +#endif // INCLUDED_L10NTOOLS_INC_TREEMERGE_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/xmlparse.hxx b/l10ntools/inc/xmlparse.hxx new file mode 100644 index 0000000000..8e7d320aff --- /dev/null +++ b/l10ntools/inc/xmlparse.hxx @@ -0,0 +1,360 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_XMLPARSE_HXX +#define INCLUDED_L10NTOOLS_INC_XMLPARSE_HXX + +#include <sal/config.h> + +#include <cstddef> +#include <memory> +#include <utility> +#include <vector> + +#include <expat.h> + +#include <rtl/string.hxx> +#include <rtl/strbuf.hxx> +#include <unordered_map> + +class XMLParentNode; +class XMLElement; + +enum class XMLNodeType{ + XFILE = 0x001, + ELEMENT = 0x002, + DATA = 0x003, + COMMENT = 0x004, + DEFAULT = 0x005 +}; + +/** Holds data of Attributes + */ +class XMLAttribute +{ +private: + OString m_sName; + OString m_sValue; + +public: + /// creates an attribute + XMLAttribute( + OString _sName, // attributes name + OString _sValue // attributes data + ) + : m_sName( std::move(_sName) ), m_sValue( std::move(_sValue) ) {} + + const OString& GetName() const { return m_sName; } + const OString& GetValue() const { return m_sValue; } + + void setValue( const OString &rValue ){ m_sValue = rValue; } +}; + + +typedef std::vector< XMLAttribute* > XMLAttributeList; + +/** Virtual base to handle different kinds of XML nodes + */ +class XMLNode +{ +protected: + XMLNode(){} + +public: + virtual XMLNodeType GetNodeType() const = 0; + virtual ~XMLNode(){} + + XMLNode(XMLNode const &) = default; + XMLNode(XMLNode &&) = default; + XMLNode & operator =(XMLNode const &) = default; + XMLNode & operator =(XMLNode &&) = default; +}; + + +/** Virtual base to handle different kinds of child nodes + */ +class XMLChildNode : public XMLNode +{ +private: + XMLParentNode *m_pParent; + +protected: + XMLChildNode( XMLParentNode *pPar ); + XMLChildNode( const XMLChildNode& rObj); + XMLChildNode& operator=(const XMLChildNode& rObj); +public: + /// returns the parent of this node + XMLParentNode *GetParent() { return m_pParent; } +}; + +typedef std::vector< XMLChildNode* > XMLChildNodeList; + +class XMLData; + +/** Virtual base to handle different kinds of parent nodes + */ + +class XMLParentNode : public XMLChildNode +{ +private: + std::unique_ptr<XMLChildNodeList> m_pChildList; + +protected: + XMLParentNode( XMLParentNode *pPar ) + : XMLChildNode( pPar ) {} + + XMLParentNode( const XMLParentNode& ); + + XMLParentNode& operator=(const XMLParentNode& rObj); + virtual ~XMLParentNode() override; + +public: + /// returns child list of this node + XMLChildNodeList *GetChildList() { return m_pChildList.get(); } + + /// adds a new child + void AddChild( + XMLChildNode *pChild /// the new child + ); + + void RemoveAndDeleteAllChildren(); +}; + +/// Mapping numeric Language code <-> XML Element +typedef std::unordered_map<OString, XMLElement*> LangHashMap; + +/// Mapping XML Element string identifier <-> Language Map +typedef std::unordered_map<OString, LangHashMap*> XMLHashMap; + +/** Holds information of a XML file, is root node of tree + */ +class XMLFile final : public XMLParentNode +{ +public: + XMLFile( + OString sFileName // the file name, empty if created from memory stream + ); + XMLFile( const XMLFile& rObj ) ; + virtual ~XMLFile() override; + + void Print( XMLNode *pCur, sal_uInt16 nLevel = 0 ); + void SearchL10NElements( XMLChildNode *pCur ); + void Extract(); + + XMLHashMap* GetStrings(){ return m_pXMLStrings.get(); } + void Write( OString const &rFilename ); + void Write( std::ofstream &rStream, XMLNode *pCur = nullptr ); + + bool CheckExportStatus( XMLChildNode *pCur = nullptr ); + + XMLFile& operator=(const XMLFile& rObj); + + virtual XMLNodeType GetNodeType() const override { return XMLNodeType::XFILE; } + + /// returns file name + const OString& GetName() const { return m_sFileName; } + void SetName( const OString &rFilename ) { m_sFileName = rFilename; } + const std::vector<OString>& getOrder() const { return m_vOrder; } + +private: + + void InsertL10NElement( XMLElement* pElement); + + // DATA + OString m_sFileName; + + /// Mapping XML tag names <-> have localizable strings + std::unordered_map<OString, bool> m_aNodes_localize; + + std::unique_ptr<XMLHashMap> m_pXMLStrings; + + std::vector <OString> m_vOrder; +}; + +/// A Utility class for XML +class XMLUtil +{ +public: + /// Quot the XML characters + static OString QuotHTML( const OString& rString ); +}; + + +/** Hold information of an element node + */ +class XMLElement : public XMLParentNode +{ +private: + OString m_sElementName; + std::unique_ptr<XMLAttributeList> m_pAttributes; + +protected: + void Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement) const; +public: + /// create an element node + XMLElement( + OString sName, // the element name + XMLParentNode *pParent // parent node of this element + ); + + virtual ~XMLElement() override; + XMLElement(const XMLElement&); + + XMLElement& operator=(const XMLElement& rObj); + virtual XMLNodeType GetNodeType() const override { return XMLNodeType::ELEMENT; } + + /// returns element name + const OString& GetName() const { return m_sElementName; } + + /// returns list of attributes of this element + XMLAttributeList *GetAttributeList() { return m_pAttributes.get(); } + + /// adds a new attribute to this element, typically used by parser + void AddAttribute( const OString &rAttribute, const OString &rValue ); + + void ChangeLanguageTag( const OString &rValue ); + + /// Return a Unicode String representation of this object + OString ToOString(); +}; + +/** Holds character data + */ +class XMLData : public XMLChildNode +{ +private: + OString m_sData; + +public: + /// create a data node + XMLData( + OString _sData, // the initial data + XMLParentNode *pParent // the parent node of this data, typically an element node + ) : XMLChildNode( pParent ), m_sData( std::move(_sData) ) {} + + // Default copy constructor and copy operator work well. + + virtual XMLNodeType GetNodeType() const override { return XMLNodeType::DATA; } + + /// returns the data + const OString& GetData() const { return m_sData; } + + /// adds new character data to the existing one + void AddData( const OString &rData ) { m_sData += rData; } +}; + +/** Holds comments + */ +class XMLComment final : public XMLChildNode +{ +private: + OString m_sComment; + +public: + /// create a comment node + XMLComment( + OString _sComment, // the comment + XMLParentNode *pParent // the parent node of this comment, typically an element node + ) + : XMLChildNode( pParent ), m_sComment( std::move(_sComment) ) {} + + // Default copy constructor and copy operator work well. + + virtual XMLNodeType GetNodeType() const override { return XMLNodeType::COMMENT; } + + /// returns the comment + const OString& GetComment() const { return m_sComment; } +}; + +/** Holds additional file content like those for which no handler exists + */ +class XMLDefault final : public XMLChildNode +{ +private: + OString m_sDefault; + +public: + /// create a comment node + XMLDefault( + OString _sDefault, // the comment + XMLParentNode *pParent // the parent node of this comment, typically an element node + ) + : XMLChildNode( pParent ), m_sDefault( std::move(_sDefault) ) {} + + // Default copy constructor and copy operator work well. + + virtual XMLNodeType GetNodeType() const override { return XMLNodeType::DEFAULT; } + + /// returns the comment + const OString& GetDefault() const { return m_sDefault; } +}; + +/** struct for error information, used by class SimpleXMLParser + */ +struct XMLError { + XML_Error m_eCode; ///< the error code + std::size_t m_nLine; ///< error line number + std::size_t m_nColumn; ///< error column number + OString m_sMessage; ///< readable error message +}; + +/** validating xml parser, creates a document tree with xml nodes + */ + +class SimpleXMLParser +{ +private: + XML_Parser m_aParser; + XMLError m_aErrorInformation; + + XMLParentNode *m_pCurNode; + XMLData *m_pCurData; + + + static void StartElementHandler( void *userData, const XML_Char *name, const XML_Char **atts ); + static void EndElementHandler( void *userData, const XML_Char *name ); + static void CharacterDataHandler( void *userData, const XML_Char *s, int len ); + static void CommentHandler( void *userData, const XML_Char *data ); + static void DefaultHandler( void *userData, const XML_Char *s, int len ); + + + void StartElement( const XML_Char *name, const XML_Char **atts ); + void EndElement(); + void CharacterData( const XML_Char *s, int len ); + void Comment( const XML_Char *data ); + void Default( const XML_Char *s, int len ); + +public: + /// creates a new parser + SimpleXMLParser(); + ~SimpleXMLParser(); + + /// parse a file, return false on critical errors + bool Execute( + const OString &rFileName, // the file name + XMLFile* pXMLFile // the XMLFile + ); + + /// returns an error struct + const XMLError &GetError() const { return m_aErrorInformation; } +}; + +#endif // INCLUDED_L10NTOOLS_INC_XMLPARSE_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/xrmlex.hxx b/l10ntools/inc/xrmlex.hxx new file mode 100644 index 0000000000..483b6dbaaf --- /dev/null +++ b/l10ntools/inc/xrmlex.hxx @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_XRMLEX_HXX +#define INCLUDED_L10NTOOLS_INC_XRMLEX_HXX + +#include <sal/config.h> + +#include <stdio.h> + +extern "C" int WorkOnTokenSet( int, char* ); +extern "C" int InitXrmExport( const char * ); +extern "C" int EndXrmExport(); +extern "C" int GetError(); +extern "C" int SetError(); +extern "C" bool GetOutputFile( int argc, char* argv[]); +extern "C" FILE *GetXrmFile(); +extern "C" const char* getFilename(); + +void yyerror( const char * ); + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/inc/xrmmerge.hxx b/l10ntools/inc/xrmmerge.hxx new file mode 100644 index 0000000000..65b19f7b92 --- /dev/null +++ b/l10ntools/inc/xrmmerge.hxx @@ -0,0 +1,142 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_L10NTOOLS_INC_XRMMERGE_HXX +#define INCLUDED_L10NTOOLS_INC_XRMMERGE_HXX + +#include <memory> +#include <sal/config.h> + +#include <fstream> +#include <string_view> + +#include <rtl/string.hxx> + +#include "export.hxx" +#include "po.hxx" + +/// Parser for *.xrm and description.xml files +class XRMResParser +{ +private: + OString sGID; + + bool bError; + bool bText; + + OString sCurrentOpenTag; + OString sCurrentCloseTag; + OString sCurrentText; +protected: + static OString GetAttribute( const OString &rToken, std::string_view rAttribute ); + static void Error( const OString &rError ); + + virtual void Output( const OString& rOutput )=0; + virtual void WorkOnDesc( + const OString &rOpenTag, + OString &rText + )=0; + virtual void WorkOnText( + const OString &rOpenTag, + OString &rText + )=0; + virtual void EndOfText( + const OString &rOpenTag, + const OString &rCloseTag + )=0; + + const OString& GetGID() const { return sGID; } + +public: + XRMResParser(); + virtual ~XRMResParser(); + + void Execute( int nToken, char * pToken ); + + void SetError() { bError = true; } + bool GetError() const { return bError; } +}; + + +/// Export strings from *.xrm and description.xml files +class XRMResExport final : public XRMResParser +{ +private: + std::unique_ptr<ResData> pResData; + OString sPath; + PoOfstream pOutputStream; + + void WorkOnDesc( + const OString &rOpenTag, + OString &rText + ) override; + void WorkOnText( + const OString &rOpenTag, + OString &rText + ) override; + void EndOfText( + const OString &rOpenTag, + const OString &rCloseTag + ) override; + void Output( const OString& rOutput ) override; + +public: + XRMResExport( + const OString &rOutputFile, + OString sFilePath + ); + virtual ~XRMResExport() override; +}; + + +/// Merge strings to *.xrm and description.xml files +class XRMResMerge final : public XRMResParser +{ +private: + std::unique_ptr<MergeDataFile> pMergeDataFile; + OString sFilename; + std::unique_ptr<ResData> pResData; + std::ofstream pOutputStream; + std::vector<OString> aLanguages; + + void WorkOnDesc( + const OString &rOpenTag, + OString &rText + ) override; + void WorkOnText( + const OString &rOpenTag, + OString &rText + ) override; + void EndOfText( + const OString &rOpenTag, + const OString &rCloseTag + ) override; + void Output( const OString& rOutput ) override; +public: + XRMResMerge( + const OString &rMergeSource, + const OString &rOutputFile, + OString sFilename + ); + virtual ~XRMResMerge() override; +}; + +#endif // INCLUDED_L10NTOOLS_INC_XRMMERGE_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/cfg_yy_wrapper.c b/l10ntools/source/cfg_yy_wrapper.c new file mode 100644 index 0000000000..485ce0ab6b --- /dev/null +++ b/l10ntools/source/cfg_yy_wrapper.c @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +// Helper to suppress warnings in lex generated c code, see #i57362# +#include "cfg_yy.c" + +void (*avoid_unused_yyunput_in_cfg_yy_c)() = yyunput; +int (*avoid_unused_yy_flex_strlen_in_cfg_yy_c)() = yy_flex_strlen; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/cfglex.l b/l10ntools/source/cfglex.l new file mode 100644 index 0000000000..1bac5a32d5 --- /dev/null +++ b/l10ntools/source/cfglex.l @@ -0,0 +1,165 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +%{ + +/* + * lexer for parsing cfg source files + */ + +#include <sal/config.h> + +/* enlarge token buffer to tokenize whole strings */ +#undef YYLMAX +#define YYLMAX 64000 + +/* to enable debug output define LEXDEBUG */ +#define LEXDEBUG 1 +#ifdef LEXDEBUG +#define OUTPUT fprintf +#else +#define OUTPUT(Par1,Par2); +#endif + +/* table of possible token ids */ +#include <tokens.h> +#include <stdlib.h> +#include <stdio.h> + +#include <sal/main.h> + +#include <cfglex.hxx> + +#define YY_NO_UNISTD_H + +static int yycolumn = 1; +#define YY_USER_ACTION yycolumn += yyleng; + +static int bText=0; +%} + +%option yylineno +%option nounput +%option never-interactive + +%p 24000 +%e 1200 +%n 500 + +%% + +\<[^\>]*"xml:lang="\""x-no-translate"\"[^\<]*\/\> { + bText = 0; + workOnTokenSet( CFG_TOKEN_NO_TRANSLATE, yytext ); +} + +\<.*\/\> { + bText = 0; + workOnTokenSet( ANYTOKEN, yytext ); +} + +\<[^\>]*"xml:lang="\".*\"[^\<]*\> { + bText = 1; + workOnTokenSet( CFG_TEXT_START, yytext ); +} + + +\<[^\/\!][^\>]*\> { + bText = 0; + workOnTokenSet( CFG_TAG, yytext ); +} + +"<!"DOCTYPE[^\>]*\> { + bText = 0; + workOnTokenSet( CFG_TAG, yytext ); +} + + +\<\!\-\- { + char c1 = 0, c2 = 0; + int c3 = yyinput(); + char pChar[2]; + pChar[1] = 0x00; + pChar[0] = c3; + + workOnTokenSet( COMMENT, yytext ); + workOnTokenSet( COMMENT, pChar ); + + for(;;) { + if ( c3 == EOF ) + break; + if ( c1 == '-' && c2 == '-' && c3 == '>' ) + break; + c1 = c2; + c2 = c3; + c3 = yyinput(); + + pChar[0] = c3; + workOnTokenSet( COMMENT, pChar ); + } +} + +\<\/[^\>]*\> { + bText = 0; + workOnTokenSet( CFG_CLOSETAG, yytext ); +} + +\<[^\>\!]*\> { + bText = 0; + if ( yytext[ 1 ] == '!' && yytext[ 2 ] == '-' && yytext[ 3 ] == '-' ) + workOnTokenSet( COMMENT, yytext ); + else + workOnTokenSet( CFG_UNKNOWNTAG, yytext ); +} + +.|\n { + yycolumn = 1; + if ( bText == 1 ) + workOnTokenSet( CFG_TEXTCHAR, yytext ); + else + workOnTokenSet( UNKNOWNCHAR, yytext ); +} + + +%% + +/*****************************************************************************/ +int yywrap(void) +/*****************************************************************************/ +{ + return 1; +} + +/*****************************************************************************/ +void yyerror ( const char *s ) +/*****************************************************************************/ +{ + /* write error to stderr */ + fprintf( stderr, + "Error: \"%s\" in line %d, column %d: \"%s\"\n", s, yylineno, yycolumn, yytext ); + exit(EXIT_FAILURE); +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { + yyin = init(argc, argv); + yylex(); + return EXIT_SUCCESS; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/l10ntools/source/cfgmerge.cxx b/l10ntools/source/cfgmerge.cxx new file mode 100644 index 0000000000..f1afb41f0c --- /dev/null +++ b/l10ntools/source/cfgmerge.cxx @@ -0,0 +1,500 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cfglex.hxx> +#include <common.hxx> + +#include <cstdio> +#include <cstdlib> +#include <iostream> +#include <memory> +#include <rtl/strbuf.hxx> +#include <o3tl/string_view.hxx> + +#include <helper.hxx> +#include <export.hxx> +#include <cfgmerge.hxx> +#include <utility> +#include <tokens.h> + +namespace { + +namespace global { + +OString inputPathname; +std::unique_ptr< CfgParser > parser; + +} +} + +extern "C" { + +FILE * init(int argc, char ** argv) { + + common::HandledArgs aArgs; + if ( !common::handleArguments(argc, argv, aArgs) ) + { + common::writeUsage("cfgex"_ostr,"*.xcu"_ostr); + std::exit(EXIT_FAILURE); + } + global::inputPathname = aArgs.m_sInputFile; + + FILE * pFile = std::fopen(global::inputPathname.getStr(), "r"); + if (pFile == nullptr) { + std::fprintf( + stderr, "Error: Cannot open file \"%s\"\n", + global::inputPathname.getStr() ); + std::exit(EXIT_FAILURE); + } + + if (aArgs.m_bMergeMode) { + global::parser.reset( + new CfgMerge( + aArgs.m_sMergeSrc, aArgs.m_sOutputFile, + global::inputPathname, aArgs.m_sLanguage )); + } else { + global::parser.reset( + new CfgExport( + aArgs.m_sOutputFile, global::inputPathname )); + } + + return pFile; +} + +void workOnTokenSet(int nTyp, char * pTokenText) { + global::parser->Execute( nTyp, pTokenText ); +} + +} + + + + +CfgStackData* CfgStack::Push(const OString &rTag, const OString &rId) +{ + CfgStackData *pD = new CfgStackData( rTag, rId ); + maList.push_back( pD ); + return pD; +} + + + + +CfgStack::~CfgStack() +{ +} + +OString CfgStack::GetAccessPath( size_t nPos ) +{ + OStringBuffer sReturn; + for (size_t i = 0; i <= nPos; ++i) + { + if (i) + sReturn.append('.'); + sReturn.append(maList[i]->GetIdentifier()); + } + + return sReturn.makeStringAndClear(); +} + +CfgStackData *CfgStack::GetStackData() +{ + if (!maList.empty()) + return maList[maList.size() - 1]; + else + return nullptr; +} + + + + +CfgParser::CfgParser() + : pStackData( nullptr ), + bLocalize( false ) +{ +} + +CfgParser::~CfgParser() +{ + // CfgParser::ExecuteAnalyzedToken pushes onto aStack some XML entities (like XML and document + // type declarations) that don't have corresponding closing tags, so will never be popped off + // aStack again. But not pushing them onto aStack in the first place would change the + // identifiers computed in CfgStack::GetAccessPath, which could make the existing translation + // mechanisms fail. So, for simplicity, and short of more thorough input error checking, take + // into account here all the patterns of such declarations encountered during a build and during + // `make translations` (some inputs start with no such declarations at all, some inputs start + // with an XML declaration, and some inputs start with an XML declaration followed by a document + // type declaration) and pop any corresponding remaining excess elements off aStack: + if (aStack.size() == 2 && aStack.GetStackData()->GetTagType() == "!DOCTYPE") { + aStack.Pop(); + } + if (aStack.size() == 1 && aStack.GetStackData()->GetTagType() == "?xml") { + aStack.Pop(); + } +} + +bool CfgParser::IsTokenClosed(std::string_view rToken) +{ + return rToken[rToken.size() - 2] == '/'; +} + +void CfgParser::AddText( + OString &rText, + const OString &rIsoLang, + const OString &rResTyp ) +{ + rText = rText.replaceAll(OString('\n'), OString()). + replaceAll(OString('\r'), OString()). + replaceAll(OString('\t'), OString()); + pStackData->sResTyp = rResTyp; + WorkOnText( rText, rIsoLang ); + pStackData->sText[ rIsoLang ] = rText; +} + +#if defined _MSC_VER +#pragma warning(disable: 4702) // unreachable code, bug in MSVC2015, it thinks the std::exit is unreachable +#endif +void CfgParser::ExecuteAnalyzedToken( int nToken, char *pToken ) +{ + OString sToken( pToken ); + + if ( sToken == " " || sToken == "\t" ) + sLastWhitespace += sToken; + + OString sTokenName; + + bool bOutput = true; + + switch ( nToken ) { + case CFG_TOKEN_PACKAGE: + case CFG_TOKEN_COMPONENT: + case CFG_TOKEN_TEMPLATE: + case CFG_TOKEN_CONFIGNAME: + case CFG_TOKEN_OORNAME: + case CFG_TOKEN_OORVALUE: + case CFG_TAG: + case ANYTOKEN: + case CFG_TEXT_START: + { + sTokenName = sToken.getToken(1, '<').getToken(0, '>'). + getToken(0, ' '); + + if ( !IsTokenClosed( sToken )) { + OString sSearch; + switch ( nToken ) { + case CFG_TOKEN_PACKAGE: + sSearch = "package-id="_ostr; + break; + case CFG_TOKEN_COMPONENT: + sSearch = "component-id="_ostr; + break; + case CFG_TOKEN_TEMPLATE: + sSearch = "template-id="_ostr; + break; + case CFG_TOKEN_CONFIGNAME: + sSearch = "cfg:name="_ostr; + break; + case CFG_TOKEN_OORNAME: + sSearch = "oor:name="_ostr; + bLocalize = true; + break; + case CFG_TOKEN_OORVALUE: + sSearch = "oor:value="_ostr; + break; + case CFG_TEXT_START: { + if ( sCurrentResTyp != sTokenName ) { + WorkOnResourceEnd(); + } + sCurrentResTyp = sTokenName; + + OString sTemp = sToken.copy( sToken.indexOf( "xml:lang=" )); + sCurrentIsoLang = sTemp.getToken(1, '"'); + + if ( sCurrentIsoLang == NO_TRANSLATE_ISO ) + bLocalize = false; + + pStackData->sTextTag = sToken; + + sCurrentText = ""_ostr; + } + break; + } + OString sTokenId; + if ( !sSearch.isEmpty()) + { + OString sTemp = sToken.copy( sToken.indexOf( sSearch )); + sTokenId = sTemp.getToken(1, '"'); + } + pStackData = aStack.Push( sTokenName, sTokenId ); + + if ( sSearch == "cfg:name=" ) { + OString sTemp( sToken.toAsciiUpperCase() ); + bLocalize = sTemp.indexOf("CFG:TYPE=\"STRING\"")>=0 + && sTemp.indexOf( "CFG:LOCALIZED=\"TRUE\"" )>=0; + } + } + else if ( sTokenName == "label" ) { + if ( sCurrentResTyp != sTokenName ) { + WorkOnResourceEnd(); + } + sCurrentResTyp = sTokenName; + } + } + break; + case CFG_CLOSETAG: + { + sTokenName = sToken.getToken(1, '/').getToken(0, '>'). + getToken(0, ' '); + if ( aStack.GetStackData() && ( aStack.GetStackData()->GetTagType() == sTokenName )) + { + if (sCurrentText.isEmpty()) + WorkOnResourceEnd(); + aStack.Pop(); + pStackData = aStack.GetStackData(); + } + else + { + const OString sError{ "Misplaced close tag: " + sToken + " in file " + global::inputPathname }; + yyerror(sError.getStr()); + std::exit(EXIT_FAILURE); + } + } + break; + + case CFG_TEXTCHAR: + sCurrentText += sToken; + bOutput = false; + break; + + case CFG_TOKEN_NO_TRANSLATE: + bLocalize = false; + break; + } + + if ( !sCurrentText.isEmpty() && nToken != CFG_TEXTCHAR ) + { + AddText( sCurrentText, sCurrentIsoLang, sCurrentResTyp ); + Output( sCurrentText ); + sCurrentText.clear(); + pStackData->sEndTextTag = sToken; + } + + if ( bOutput ) + Output( sToken ); + + if ( sToken != " " && sToken != "\t" ) + sLastWhitespace = ""_ostr; +} + +void CfgExport::Output(const OString&) +{ +} + +void CfgParser::Execute( int nToken, char * pToken ) +{ + OString sToken( pToken ); + + switch ( nToken ) { + case CFG_TAG: + if ( sToken.indexOf( "package-id=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_PACKAGE, pToken ); + return; + } else if ( sToken.indexOf( "component-id=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_COMPONENT, pToken ); + return; + } else if ( sToken.indexOf( "template-id=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_TEMPLATE, pToken ); + return; + } else if ( sToken.indexOf( "cfg:name=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_OORNAME, pToken ); + return; + } else if ( sToken.indexOf( "oor:name=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_OORNAME, pToken ); + return; + } else if ( sToken.indexOf( "oor:value=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_OORVALUE, pToken ); + return; + } + break; + } + ExecuteAnalyzedToken( nToken, pToken ); +} + + + + +CfgExport::CfgExport( + const OString &rOutputFile, + OString sFilePath ) + : sPath(std::move( sFilePath )) +{ + pOutputStream.open( rOutputFile, PoOfstream::APP ); + if (!pOutputStream.isOpen()) + { + std::cerr << "ERROR: Unable to open output file: " << rOutputFile << "\n"; + std::exit(EXIT_FAILURE); + } +} + +CfgExport::~CfgExport() +{ + pOutputStream.close(); +} + + +void CfgExport::WorkOnResourceEnd() +{ + if ( !bLocalize ) + return; + + if ( pStackData->sText["en-US"_ostr].isEmpty() ) + return; + + OString sXComment = pStackData->sText["x-comment"_ostr]; + OString sLocalId = pStackData->sIdentifier; + OString sGroupId; + if ( aStack.size() == 1 ) { + sGroupId = sLocalId; + sLocalId = ""_ostr; + } + else { + sGroupId = aStack.GetAccessPath( aStack.size() - 2 ); + } + + + OString sText = pStackData->sText[ "en-US"_ostr ]; + sText = helper::UnQuotHTML( sText ); + + common::writePoEntry( + "Cfgex"_ostr, pOutputStream, sPath, pStackData->sResTyp, + sGroupId, sLocalId, sXComment, sText); +} + +void CfgExport::WorkOnText( + OString &rText, + const OString &rIsoLang +) +{ + if( !rIsoLang.isEmpty() ) rText = helper::UnQuotHTML( rText ); +} + + + + +CfgMerge::CfgMerge( + const OString &rMergeSource, const OString &rOutputFile, + OString _sFilename, const OString &rLanguage ) + : sFilename(std::move( _sFilename )), + bEnglish( false ) +{ + pOutputStream.open( + rOutputFile.getStr(), std::ios_base::out | std::ios_base::trunc); + if (!pOutputStream.is_open()) + { + std::cerr << "ERROR: Unable to open output file: " << rOutputFile << "\n"; + std::exit(EXIT_FAILURE); + } + + if (!rMergeSource.isEmpty()) + { + pMergeDataFile.reset(new MergeDataFile( + rMergeSource, global::inputPathname, true )); + if (rLanguage.equalsIgnoreAsciiCase("ALL") ) + { + aLanguages = pMergeDataFile->GetLanguages(); + } + else aLanguages.push_back(rLanguage); + } + else + aLanguages.push_back(rLanguage); +} + +CfgMerge::~CfgMerge() +{ + pOutputStream.close(); +} + +void CfgMerge::WorkOnText(OString &, const OString& rLangIndex) +{ + if ( !(pMergeDataFile && bLocalize) ) + return; + + if ( !pResData ) { + OString sLocalId = pStackData->sIdentifier; + OString sGroupId; + if ( aStack.size() == 1 ) { + sGroupId = sLocalId; + sLocalId.clear(); + } + else { + sGroupId = aStack.GetAccessPath( aStack.size() - 2 ); + } + + pResData.reset( new ResData( sGroupId, sFilename ) ); + pResData->sId = sLocalId; + pResData->sResTyp = pStackData->sResTyp; + } + + if (rLangIndex.equalsIgnoreAsciiCase("en-US")) + bEnglish = true; +} + +void CfgMerge::Output(const OString& rOutput) +{ + pOutputStream << rOutput; +} + +void CfgMerge::WorkOnResourceEnd() +{ + + if ( pMergeDataFile && pResData && bLocalize && bEnglish ) { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrysCaseSensitive( pResData.get() ); + if ( pEntrys ) { + OString sCur; + + for( size_t i = 0; i < aLanguages.size(); ++i ){ + sCur = aLanguages[ i ]; + + OString sContent; + pEntrys->GetText( sContent, sCur, true ); + if ( + ( !sCur.equalsIgnoreAsciiCase("en-US") ) && !sContent.isEmpty()) + { + OString sTextTag = pStackData->sTextTag; + const sal_Int32 nLangAttributeStart{ sTextTag.indexOf( "xml:lang=" ) }; + const sal_Int32 nLangStart{ sTextTag.indexOf( '"', nLangAttributeStart )+1 }; + const sal_Int32 nLangEnd{ sTextTag.indexOf( '"', nLangStart ) }; + OString sAdditionalLine{ "\t" + + sTextTag.replaceAt(nLangStart, nLangEnd-nLangStart, sCur) + + helper::QuotHTML(sContent) + + pStackData->sEndTextTag + + "\n" + + sLastWhitespace }; + Output( sAdditionalLine ); + } + } + } + } + pResData.reset(); + bEnglish = false; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/common.cxx b/l10ntools/source/common.cxx new file mode 100644 index 0000000000..4cc9ba6405 --- /dev/null +++ b/l10ntools/source/common.cxx @@ -0,0 +1,161 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <iostream> + +#include <common.hxx> + +namespace { + +//flags for handleArguments() +enum class State { + NONE, Input, Output, MergeSrc, Languages +}; + +} + +namespace common { + +bool handleArguments( + int argc, char * argv[], HandledArgs& o_aHandledArgs) +{ + o_aHandledArgs = HandledArgs(); + State nState = State::NONE; + + for( int i = 1; i < argc; i++ ) + { + if ( OString( argv[ i ] ).toAsciiUpperCase() == "-I" ) + { + nState = State::Input; // next token specifies source file + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-O" ) + { + nState = State::Output; // next token specifies the dest file + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-M" ) + { + nState = State::MergeSrc; // next token specifies the merge database + o_aHandledArgs.m_bMergeMode = true; + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-L" ) + { + nState = State::Languages; + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-B" ) + { + o_aHandledArgs.m_bUTF8BOM = true; + } + else + { + switch ( nState ) + { + case State::NONE: + { + return false; // no valid command line + } + case State::Input: + { + o_aHandledArgs.m_sInputFile = OString( argv[i] ); + } + break; + case State::Output: + { + o_aHandledArgs.m_sOutputFile = OString( argv[i] ); + } + break; + case State::MergeSrc: + { + o_aHandledArgs.m_sMergeSrc = OString( argv[i] ); + } + break; + case State::Languages: + { + o_aHandledArgs.m_sLanguage = OString( argv[i] ); + } + break; + } + } + } + if( !o_aHandledArgs.m_sInputFile.isEmpty() && + !o_aHandledArgs.m_sOutputFile.isEmpty() ) + { + return true; + } + else + { + o_aHandledArgs = HandledArgs(); + return false; + } +} + +void writeUsage(const OString& rName, const OString& rFileType) +{ + std::cout + << " Syntax: " << rName + << " -i FileIn -o FileOut [-m DataBase] [-l Lang] [-b]\n" + " FileIn: Source files (" << rFileType << ")\n" + " FileOut: Destination file (*.*)\n" + " DataBase: Mergedata (*.po)\n" + " Lang: Restrict the handled language; one element of\n" + " (de, en-US, ...) or all\n" + " -b: Add UTF-8 Byte Order Mark to FileOut(use with -m option)\n"; +} + +void writePoEntry( + const OString& rExecutable, PoOfstream& rPoStream, const OString& rSourceFile, + std::string_view rResType, const OString& rGroupId, const OString& rLocalId, + const OString& rHelpText, const OString& rText, const PoEntry::TYPE eType ) +{ + try + { + PoEntry aPO(rSourceFile, rResType, rGroupId, rLocalId, rHelpText, rText, eType); + rPoStream.writeEntry( aPO ); + } + catch( PoEntry::Exception& aException ) + { + if(aException == PoEntry::NOSOURCFILE) + { + std::cerr << rExecutable << " warning: no sourcefile specified for po entry\n"; + } + else + { + std::cerr << rExecutable << " warning: invalid po attributes extracted from " << rSourceFile << "\n"; + if(aException == PoEntry::NOGROUPID) + { + std::cerr << "No groupID specified!\n"; + std::cerr << "String: " << rText << "\n"; + } + else if (aException == PoEntry::NOSTRING) + { + std::cerr << "No string specified!\n"; + std::cerr << "GroupID: " << rGroupId << "\n"; + if( !rLocalId.isEmpty() ) std::cerr << "LocalID: " << rLocalId << "\n"; + } + else + { + if (aException == PoEntry::NORESTYPE) + { + std::cerr << "No resource type specified!\n"; + } + else if (aException == PoEntry::WRONGHELPTEXT) + { + std::cerr << "x-comment length is 5 characters:" << rHelpText << "\n"; + } + + std::cerr << "GroupID: " << rGroupId << "\n"; + if( !rLocalId.isEmpty() ) std::cerr << "LocalID: " << rLocalId << "\n"; + std::cerr << "String: " << rText << "\n"; + } + } + } +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/helper.cxx b/l10ntools/source/helper.cxx new file mode 100644 index 0000000000..4726234b19 --- /dev/null +++ b/l10ntools/source/helper.cxx @@ -0,0 +1,156 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <libxml/parser.h> + +#include <o3tl/safeint.hxx> +#include <o3tl/string_view.hxx> +#include <rtl/strbuf.hxx> + +#include <helper.hxx> + +namespace helper { + +OString escapeAll( + std::string_view rText, std::string_view rUnEscaped, std::string_view rEscaped ) +{ + assert( rEscaped.size() == 2*rUnEscaped.size() ); + OStringBuffer sReturn; + for ( size_t nIndex = 0; nIndex < rText.size(); ++nIndex ) + { + size_t nUnEscapedOne = rUnEscaped.find(rText[nIndex]); + if( nUnEscapedOne != std::string_view::npos ) + { + sReturn.append(rEscaped.substr(nUnEscapedOne*2,2)); + } + else + sReturn.append(rText[nIndex]); + } + return sReturn.makeStringAndClear(); +} + + +OString unEscapeAll( + std::string_view rText, std::string_view rEscaped, std::string_view rUnEscaped) +{ + assert( rEscaped.size() == 2*rUnEscaped.length() ); + OStringBuffer sReturn; + const size_t nLength = rText.size(); + for ( size_t nIndex = 0; nIndex < nLength; ++nIndex ) + { + if( rText[nIndex] == '\\' && nIndex+1 < nLength ) + { + size_t nEscapedOne = rEscaped.find(rText.substr(nIndex,2)); + if( nEscapedOne != std::string_view::npos ) + { + sReturn.append(rUnEscaped[nEscapedOne/2]); + ++nIndex; + } + else + { + sReturn.append(rText[nIndex]); + } + } + else + sReturn.append(rText[nIndex]); + } + return sReturn.makeStringAndClear(); +} + + +OString QuotHTML(std::string_view rString) +{ + OStringBuffer sReturn; + for (size_t i = 0; i < rString.size(); ++i) + { + switch (rString[i]) + { + case '<': + sReturn.append("<"); + break; + case '>': + sReturn.append(">"); + break; + case '"': + sReturn.append("""); + break; + case '\'': + sReturn.append("'"); + break; + case '&': + if (o3tl::starts_with(rString.substr(i), "&")) + sReturn.append('&'); + else + sReturn.append("&"); + break; + default: + sReturn.append(rString[i]); + break; + } + } + return sReturn.makeStringAndClear(); +} + +OString UnQuotHTML( std::string_view rString ) +{ + OStringBuffer sReturn; + for (size_t i = 0; i != rString.size();) { + auto tmp = rString.substr(i); + if (o3tl::starts_with(tmp, "&")) { + sReturn.append('&'); + i += RTL_CONSTASCII_LENGTH("&"); + } else if (o3tl::starts_with(tmp, "<")) { + sReturn.append('<'); + i += RTL_CONSTASCII_LENGTH("<"); + } else if (o3tl::starts_with(tmp, ">")) { + sReturn.append('>'); + i += RTL_CONSTASCII_LENGTH(">"); + } else if (o3tl::starts_with(tmp, """)) { + sReturn.append('"'); + i += RTL_CONSTASCII_LENGTH("""); + } else if (o3tl::starts_with(tmp, "'")) { + sReturn.append('\''); + i += RTL_CONSTASCII_LENGTH("'"); + } else { + sReturn.append(rString[i]); + ++i; + } + } + return sReturn.makeStringAndClear(); +} + +bool isWellFormedXML( std::string_view text ) +{ + xmlDocPtr doc; + bool result = true; + + OString content = OString::Concat("<root>") + text + "</root>"; + doc = xmlParseMemory(content.getStr(),static_cast<int>(content.getLength())); + if (doc == nullptr) { + result = false; + } + xmlFreeDoc(doc); + xmlCleanupParser(); + return result; +} + +//Convert xmlChar* to OString +OString xmlStrToOString( const xmlChar* pString ) +{ + xmlChar* pTemp = xmlStrdup( pString ); + OString sResult = reinterpret_cast<char*>( pTemp ); + xmlFree( pTemp ); + return sResult; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/helpex.cxx b/l10ntools/source/helpex.cxx new file mode 100644 index 0000000000..97e574fc7d --- /dev/null +++ b/l10ntools/source/helpex.cxx @@ -0,0 +1,142 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <iostream> +#include <string> +#include <cstring> + +#include <sal/main.h> + +#include <helpmerge.hxx> +#include <common.hxx> +#include <memory> + +#ifndef TESTDRIVER + +static void WriteUsage() +{ + std::cout + << (" Syntax: Helpex -[m]i FileIn -o FileOut [-m DataBase] [-l Lang]\n" + " FileIn + i: Source file (*.xhp)\n" + " FileIn + -mi: File including paths of source files" + " (only for merge)\n" + " FileOut: Destination file (*.*) or files (in case of -mi)\n" + " DataBase: Mergedata (*.po)\n" + " Lang: Restrict the handled languages; one element of\n" + " (de, en-US, ...) or all\n"); +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + bool hasNoError = true; + try + { + bool bMultiMerge = false; + for (int nIndex = 1; nIndex != argc; ++nIndex) + { + if (std::strcmp(argv[nIndex], "-mi") == 0) + { + argv[nIndex][1] = 'i'; + argv[nIndex][2] = '\0'; + bMultiMerge = true; + break; + } + } + + common::HandledArgs aArgs; + if ( !common::handleArguments( argc, argv, aArgs) ) + { + WriteUsage(); + return 1; + } + + if ( aArgs.m_bMergeMode ) + { + if( bMultiMerge ) + { + std::ifstream aInput( aArgs.m_sInputFile.getStr() ); + if( !aInput.is_open() ) + { + std::cerr << "Helpex error: cannot open input file\n"; + return 1; + } + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( aArgs.m_sLanguage != "qtz") + { + pMergeDataFile.reset(new MergeDataFile(aArgs.m_sMergeSrc, "", false, false )); + } + std::string sTemp; + aInput >> sTemp; + while( !aInput.eof() ) + { + // coverity[tainted_data] - this is a build time tool + const OString sXhpFile( sTemp.data(), static_cast<sal_Int32>(sTemp.length()) ); + HelpParser aParser( sXhpFile ); + const OString sOutput( + aArgs.m_sOutputFile + + sXhpFile.subView( sXhpFile.lastIndexOf('/') )); + if( !aParser.Merge( sOutput, + aArgs.m_sLanguage, pMergeDataFile.get() )) + { + hasNoError = false; + } + aInput >> sTemp; + } + aInput.close(); + } + else + { + HelpParser aParser( aArgs.m_sInputFile ); + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( aArgs.m_sLanguage != "qtz") + { + pMergeDataFile.reset(new MergeDataFile(aArgs.m_sMergeSrc, aArgs.m_sInputFile, false, false )); + } + hasNoError = + aParser.Merge( + aArgs.m_sOutputFile, + aArgs.m_sLanguage, pMergeDataFile.get() ); + } + } + else + { + HelpParser aParser( aArgs.m_sInputFile ); + XMLFile xmlfile( OString('0') ); + hasNoError = + HelpParser::CreatePO( + aArgs.m_sOutputFile, aArgs.m_sInputFile, + &xmlfile, "help" ); + } + } + catch (std::exception& e) + { + std::cerr << "Helpex exception: " << e.what() << std::endl; + hasNoError = true; + } + + if( hasNoError ) + return 0; + else + return 1; +} +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/helpmerge.cxx b/l10ntools/source/helpmerge.cxx new file mode 100644 index 0000000000..fa0358a89d --- /dev/null +++ b/l10ntools/source/helpmerge.cxx @@ -0,0 +1,249 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <fstream> + +#include <sal/log.hxx> + +#include <stdio.h> +#include <stdlib.h> +#include <helpmerge.hxx> +#include <utility> +#include <vector> + +#ifdef _WIN32 +#if !defined WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +#undef CopyFile +#include <direct.h> +#endif + +#include <export.hxx> +#include <common.hxx> +#include <helper.hxx> +#include <po.hxx> + +#if OSL_DEBUG_LEVEL > 2 +void HelpParser::Dump(XMLHashMap* rElem_in) +{ + for (auto const& pos : *rElem_in) + { + Dump(pos.second,pos.first); + } +} + +void HelpParser::Dump(LangHashMap* rElem_in,const OString & sKey_in) +{ + OString x; + OString y; + fprintf(stdout,"+------------%s-----------+\n",sKey_in.getStr() ); + for (auto const& posn : *rElem_in) + { + x=posn.first; + y=posn.second->ToOString(); + fprintf(stdout,"key=%s value=%s\n",x.getStr(),y.getStr()); + } + fprintf(stdout,"+--------------------------+\n"); +} +#endif + +HelpParser::HelpParser( OString _sHelpFile ) + : sHelpFile(std::move( _sHelpFile )) + {}; + +/*****************************************************************************/ +bool HelpParser::CreatePO( +/*****************************************************************************/ + const OString &rPOFile_in, const OString &sHelpFile, + XMLFile* pXmlFile, std::string_view rGsi1){ + SimpleXMLParser aParser; + //TODO: explicit BOM handling? + + if (!aParser.Execute( sHelpFile, pXmlFile )) + { + printf( + "%s: %s\n", + sHelpFile.getStr(), + aParser.GetError().m_sMessage.getStr()); + exit(-1); + } + pXmlFile->Extract(); + if( !pXmlFile->CheckExportStatus() ){ + return true; + } + + PoOfstream aPoOutput( rPOFile_in, PoOfstream::APP ); + + if (!aPoOutput.isOpen()) { + fprintf(stdout,"Can't open file %s\n",rPOFile_in.getStr()); + return false; + } + + XMLHashMap* aXMLStrHM = pXmlFile->GetStrings(); + + std::vector<OString> order = pXmlFile->getOrder(); + + for (auto const& pos : order) + { + auto posm = aXMLStrHM->find(pos); + LangHashMap* pElem = posm->second; + + XMLElement* pXMLElement = (*pElem)[ "en-US"_ostr ]; + + if( pXMLElement != nullptr ) + { + OString data( + pXMLElement->ToOString(). + replaceAll("\n"_ostr,OString()). + replaceAll("\t"_ostr,OString()).trim()); + + common::writePoEntry( + "Helpex"_ostr, aPoOutput, sHelpFile, rGsi1, + posm->first, OString(), OString(), data); + + pXMLElement=nullptr; + } + else + { + // If this is something totally unexpected, wouldn't an assert() be in order? + // On the other hand, if this is expected, why the printf? + fprintf(stdout,"\nDBG: NullPointer in HelpParser::CreatePO, File %s\n", sHelpFile.getStr()); + } + } + aPoOutput.close(); + + return true; +} + +bool HelpParser::Merge( const OString &rDestinationFile, + const OString& rLanguage , MergeDataFile* pMergeDataFile ) +{ + SimpleXMLParser aParser; + + //TODO: explicit BOM handling? + + XMLFile xmlfile( OString('0') ); + if (!aParser.Execute( sHelpFile, &xmlfile)) + { + SAL_WARN("l10ntools", "could not parse " << sHelpFile); + return false; + } + MergeSingleFile( &xmlfile , pMergeDataFile , rLanguage , rDestinationFile ); + return true; +} + +void HelpParser::MergeSingleFile( XMLFile* file , MergeDataFile* pMergeDataFile , const OString& sLanguage , + OString const & sPath ) +{ + file->Extract(); + + XMLHashMap* aXMLStrHM = file->GetStrings(); + static ResData s_ResData(""_ostr,""_ostr); + s_ResData.sResTyp = "help"_ostr; + + std::vector<OString> order = file->getOrder(); + + for (auto const& pos : order) // Merge every l10n related string in the same order as export + { + auto posm = aXMLStrHM->find(pos); + LangHashMap* aLangHM = posm->second; +#if OSL_DEBUG_LEVEL > 2 + printf("*********************DUMPING HASHMAP***************************************"); + Dump(aXMLStrHM); + printf("DBG: sHelpFile = %s\n",sHelpFile.getStr() ); +#endif + + s_ResData.sGId = posm->first; + s_ResData.sFilename = sHelpFile; + + ProcessHelp( aLangHM , sLanguage, &s_ResData , pMergeDataFile ); + } + + file->Write(sPath); +} + +/* ProcessHelp method: search for en-US entry and replace it with the current language*/ +void HelpParser::ProcessHelp( LangHashMap* aLangHM , const OString& sCur , ResData *pResData , MergeDataFile* pMergeDataFile ){ + + XMLElement* pXMLElement = nullptr; + + if( sCur.equalsIgnoreAsciiCase("en-US") ) + return; + + pXMLElement = (*aLangHM)[ "en-US"_ostr ]; + if( pXMLElement == nullptr ) + { + printf("Error: Can't find en-US entry\n"); + return; + } + + OString sNewText; + OString sNewdata; + OString sSourceText( + pXMLElement->ToOString(). + replaceAll( + "\n"_ostr, + OString()). + replaceAll( + "\t"_ostr, + OString())); + // re-add spaces to the beginning of translated string, + // important for indentation of Basic code examples + sal_Int32 nPreSpaces = 0; + sal_Int32 nLen = sSourceText.getLength(); + while ( (nPreSpaces < nLen) && (sSourceText[nPreSpaces] == ' ') ) + nPreSpaces++; + if( sCur == "qtz" ) + { + sNewText = MergeEntrys::GetQTZText(*pResData, sSourceText); + sNewdata = sNewText; + } + else if( pMergeDataFile ) + { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrys( pResData ); + if( pEntrys != nullptr) + { + pEntrys->GetText( sNewText, sCur, true ); + if (helper::isWellFormedXML(XMLUtil::QuotHTML(sNewText))) + { + sNewdata = sSourceText.subView(0,nPreSpaces) + sNewText; + } + } + } + if (!sNewdata.isEmpty()) + { + XMLData *data = new XMLData( sNewdata , nullptr ); // Add new one + pXMLElement->RemoveAndDeleteAllChildren(); + pXMLElement->AddChild( data ); + aLangHM->erase( sCur ); + } + else + { + SAL_WARN( + "l10ntools", + "Can't find GID=" << pResData->sGId << " TYP=" << pResData->sResTyp); + } + pXMLElement->ChangeLanguageTag(sCur); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/idxdict/idxdict.cxx b/l10ntools/source/idxdict/idxdict.cxx new file mode 100644 index 0000000000..6d2a22b3d0 --- /dev/null +++ b/l10ntools/source/idxdict/idxdict.cxx @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <cerrno> +#include <iostream> +#include <fstream> +#include <string> +#include <map> +#include <stdlib.h> +#include <string.h> + +const int MAXLINE = 1024*64; + +int main(int argc, char *argv[]) +{ + if (argc != 3 || strcmp(argv[1],"-o")) + { + std::cout << "Usage: idxdict -o outputfile < input\n"; + ::exit(99); + } + // This call improves performance by approx 5x + std::ios_base::sync_with_stdio(false); + + const char * outputFile(argv[2]); + char inputBuffer[MAXLINE]; + std::multimap<std::string, size_t> entries; + std::multimap<std::string,size_t>::iterator ret(entries.begin()); + + std::cin.getline(inputBuffer, MAXLINE); + const std::string encoding(inputBuffer); + size_t currentOffset(encoding.size()+1); + while (true) + { + // Extract the next word, but not the entry count + std::cin.getline(inputBuffer, MAXLINE, '|'); + + if (std::cin.eof()) break; + + std::string word(inputBuffer); + ret = entries.insert(ret, std::pair<std::string, size_t>(word, currentOffset)); + currentOffset += word.size() + 1; + // Next is the entry count + std::cin.getline(inputBuffer, MAXLINE); + if (!std::cin.good()) + { + std::cerr << "Unable to read entry - insufficient buffer?.\n"; + exit(99); + } + currentOffset += strlen(inputBuffer)+1; + char * endptr; + errno = 0; + int entryCount(strtol(inputBuffer, &endptr, 10)); + if (errno != 0 || endptr == inputBuffer || *endptr != '\0') + { + std::cerr + << "Unable to read count from \"" << inputBuffer + << "\" input.\n"; + exit(99); + } + for (int i(0); i < entryCount; ++i) + { + std::cin.getline(inputBuffer, MAXLINE); + currentOffset += strlen(inputBuffer)+1; + } + } + + // Use binary mode to prevent any translation of LF to CRLF on Windows + std::ofstream outputStream(outputFile, std::ios_base::binary| std::ios_base::trunc|std::ios_base::out); + if (!outputStream.is_open()) + { + std::cerr << "Unable to open output file " << outputFile << std::endl; + ::exit(99); + } + + outputStream << encoding << '\n' << entries.size() << '\n'; + + for (auto const& entry : entries) + { + outputStream << entry.first << '|' << entry.second << '\n'; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/lngex.cxx b/l10ntools/source/lngex.cxx new file mode 100644 index 0000000000..69d49f885a --- /dev/null +++ b/l10ntools/source/lngex.cxx @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <sal/main.h> + +#include <common.hxx> + +#include <lngmerge.hxx> + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + common::HandledArgs aArgs; + if (!common::handleArguments(argc, argv, aArgs)) + { + common::writeUsage("ulfex"_ostr, "*.ulf"_ostr); + return 1; + } + + LngParser aParser(aArgs.m_sInputFile); + if (aArgs.m_bMergeMode) + aParser.Merge(aArgs.m_sMergeSrc, aArgs.m_sOutputFile, aArgs.m_sLanguage); + else + aParser.CreatePO(aArgs.m_sOutputFile); + + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/lngmerge.cxx b/l10ntools/source/lngmerge.cxx new file mode 100644 index 0000000000..4eaa94940c --- /dev/null +++ b/l10ntools/source/lngmerge.cxx @@ -0,0 +1,281 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <o3tl/string_view.hxx> + +#include <cstddef> +#include <iostream> +#include <memory> +#include <string> + +#include <common.hxx> +#include <po.hxx> +#include <lngmerge.hxx> +#include <utility> + +namespace { + +bool lcl_isNextGroup(OString &sGroup_out, std::string_view sLineTrim) +{ + if (o3tl::starts_with(sLineTrim, "[") && o3tl::ends_with(sLineTrim, "]")) + { + sLineTrim = o3tl::getToken(sLineTrim, 1, '['); + sLineTrim = o3tl::getToken(sLineTrim, 0, ']'); + sGroup_out = OString(o3tl::trim(sLineTrim)); + return true; + } + return false; +} + +void lcl_RemoveUTF8ByteOrderMarker( OString &rString ) +{ + if( rString.getLength() >= 3 && rString[0] == '\xEF' && + rString[1] == '\xBB' && rString[2] == '\xBF' ) + { + rString = rString.copy(3); + } +} + +} + + + +LngParser::LngParser(OString sLngFile) + : sSource(std::move( sLngFile )) +{ + std::ifstream aStream(sSource.getStr()); + if (!aStream.is_open()) + return; + + bool bFirstLine = true; + std::string s; + std::getline(aStream, s); + while (!aStream.eof()) + { + OString sLine(s.data(), s.length()); + + if( bFirstLine ) + { + // Always remove UTF8 BOM from the first line + lcl_RemoveUTF8ByteOrderMarker( sLine ); + bFirstLine = false; + } + + mvLines.push_back( sLine ); + std::getline(aStream, s); + } + mvLines.push_back( OString() ); +} + +LngParser::~LngParser() +{ +} + +void LngParser::CreatePO( const OString &rPOFile ) +{ + PoOfstream aPOStream( rPOFile, PoOfstream::APP ); + if (!aPOStream.isOpen()) { + std::cerr << "Ulfex error: Can't open po file:" << rPOFile << "\n"; + } + + size_t nPos = 0; + bool bStart = true; + OString sGroup, sLine; + OStringHashMap Text; + OString sID; + + while( nPos < mvLines.size() ) { + sLine = mvLines[ nPos++ ]; + while( nPos < mvLines.size() && !isNextGroup( sGroup , sLine ) ) { + ReadLine( sLine , Text ); + sID = sGroup; + sLine = mvLines[ nPos++ ]; + } + if( bStart ) { + bStart = false; + sID = sGroup; + } + else { + WritePO( aPOStream , Text , sSource , sID ); + } + Text.erase("x-comment"_ostr); + } + aPOStream.close(); +} + +void LngParser::WritePO(PoOfstream &aPOStream, + OStringHashMap &rText_inout, const OString &rActFileName, + const OString &rID) +{ + common::writePoEntry( + "Ulfex"_ostr, aPOStream, rActFileName, "LngText", + rID, OString(), rText_inout.count("x-comment"_ostr) ? rText_inout["x-comment"_ostr] : OString(), rText_inout["en-US"_ostr]); +} + +bool LngParser::isNextGroup(OString &sGroup_out, std::string_view sLine_in) +{ + return lcl_isNextGroup(sGroup_out, o3tl::trim(sLine_in)); +} + +void LngParser::ReadLine(std::string_view rLine_in, + OStringHashMap &rText_inout) +{ + if (!o3tl::starts_with(rLine_in, " *") && !o3tl::starts_with(rLine_in, "/*")) + { + OString sLang(o3tl::trim(o3tl::getToken(rLine_in, 0, '='))); + if (!sLang.isEmpty()) { + OString sText(o3tl::getToken(rLine_in,1, '"')); + rText_inout[sLang] = sText; + } + } +} + +void LngParser::Merge( + const OString &rPOFile, + const OString &rDestinationFile, + std::string_view rLanguage ) +{ + std::ofstream aDestination( + rDestinationFile.getStr(), std::ios_base::out | std::ios_base::trunc); + + MergeDataFile aMergeDataFile( rPOFile, sSource, false, true ); + if( o3tl::equalsIgnoreAsciiCase(rLanguage, "ALL") ) + aLanguages = aMergeDataFile.GetLanguages(); + + size_t nPos = 0; + bool bGroup = false; + OString sGroup; + + // seek to next group + while ( nPos < mvLines.size() && !bGroup ) + bGroup = lcl_isNextGroup(sGroup, o3tl::trim(mvLines[nPos++])); + + while ( nPos < mvLines.size()) { + OStringHashMap Text; + OString sID( sGroup ); + std::size_t nLastLangPos = 0; + + ResData aResData( sID, sSource ); + aResData.sResTyp = "LngText"_ostr; + MergeEntrys *pEntrys = aMergeDataFile.GetMergeEntrys( &aResData ); + // read languages + bGroup = false; + + OString sLanguagesDone; + + while ( nPos < mvLines.size() && !bGroup ) + { + const OString sLine{ mvLines[nPos].trim() }; + if ( lcl_isNextGroup(sGroup, sLine) ) + { + bGroup = true; + nPos ++; + sLanguagesDone = ""_ostr; + } + else + { + sal_Int32 n = 0; + OString sLang(sLine.getToken(0, '=', n)); + if (n == -1 || static_cast<bool>(sLine.match("/*"))) + { + ++nPos; + } + else + { + sLang = sLang.trim(); + + OString sSearch{ ";" + sLang + ";" }; + + if ( sLanguagesDone.indexOf( sSearch ) != -1 ) { + mvLines.erase( mvLines.begin() + nPos ); + } + if( pEntrys ) + { + if( !sLang.isEmpty() ) + { + OString sNewText; + pEntrys->GetText( sNewText, sLang, true ); + if( sLang == "qtz" ) + continue; + + if ( !sNewText.isEmpty()) { + mvLines[ nPos ] = sLang + + " = \"" + // escape quotes, unescape double escaped quotes fdo#56648 + + sNewText.replaceAll("\""_ostr,"\\\""_ostr).replaceAll("\\\\\""_ostr,"\\\""_ostr) + + "\""; + Text[ sLang ] = sNewText; + } + } + nLastLangPos = nPos; + nPos ++; + sLanguagesDone += sSearch; + } + else { + nLastLangPos = nPos; + nPos ++; + sLanguagesDone += sSearch; + } + } + } + } + OString sCur; + if ( nLastLangPos ) + { + for(size_t n = 0; n < aLanguages.size(); ++n) + { + sCur = aLanguages[ n ]; + if( !sCur.equalsIgnoreAsciiCase("en-US") && Text[sCur].isEmpty() && pEntrys ) + { + + OString sNewText; + pEntrys->GetText( sNewText, sCur, true ); + if( sCur == "qtz" ) + continue; + if ( !sNewText.isEmpty() && sCur != "x-comment") + { + const OString sLine { sCur + + " = \"" + // escape quotes, unescape double escaped quotes fdo#56648 + + sNewText.replaceAll("\""_ostr,"\\\""_ostr).replaceAll("\\\\\""_ostr,"\\\""_ostr) + + "\"" }; + + nLastLangPos++; + nPos++; + + if ( nLastLangPos < mvLines.size() ) { + mvLines.insert( mvLines.begin() + nLastLangPos, sLine ); + } else { + mvLines.push_back( sLine ); + } + } + } + } + } + } + + for ( size_t i = 0; i < mvLines.size(); ++i ) + aDestination << mvLines[i] << '\n'; + + aDestination.close(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/localize.cxx b/l10ntools/source/localize.cxx new file mode 100644 index 0000000000..96da93e139 --- /dev/null +++ b/l10ntools/source/localize.cxx @@ -0,0 +1,525 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cassert> +#include <cstdlib> +#include <iostream> +#include <string> +#include <string_view> +#include <map> +#include <vector> +#include <algorithm> + +#include <o3tl/string_view.hxx> +#include <osl/file.h> +#include <osl/file.hxx> +#include <osl/thread.h> +#include <rtl/string.h> +#include <rtl/string.hxx> +#include <rtl/textcvt.h> +#include <rtl/strbuf.hxx> +#include <rtl/ustring.h> +#include <rtl/ustring.hxx> +#include <sal/macros.h> +#include <sal/main.h> +#include <sal/types.h> + +#include <po.hxx> + +namespace { + +OString libraryPathEnvVarOverride; + +bool matchList( + std::u16string_view rUrl, const std::u16string_view* pList, size_t nLength) +{ + for (size_t i = 0; i != nLength; ++i) { + if (o3tl::ends_with(rUrl, pList[i])) { + return true; + } + } + return false; +} + +bool passesNegativeList(std::u16string_view rUrl) { + static const std::u16string_view list[] = { + u"/desktop/test/deployment/passive/help/en/help.tree", + u"/desktop/test/deployment/passive/help/en/main.xhp", + u"/dictionaries.xcu", + u"/dictionaries/da_DK/help/da/help.tree", + (u"/dictionaries/da_DK/help/da/" + "org.openoffice.da.hunspell.dictionaries/page1.xhp"), + (u"/dictionaries/da_DK/help/da/" + "org.openoffice.da.hunspell.dictionaries/page2.xhp"), + u"/dictionaries/hu_HU/help/hu/help.tree", + (u"/dictionaries/hu_HU/help/hu/" + "org.openoffice.hu.hunspell.dictionaries/page1.xhp"), + u"/officecfg/registry/data/org/openoffice/Office/Accelerators.xcu" + }; + return !matchList(rUrl, list, SAL_N_ELEMENTS(list)); +} + +bool passesPositiveList(std::u16string_view rUrl) { + static const std::u16string_view list[] = { + u"/description.xml" + }; + return matchList(rUrl, list, SAL_N_ELEMENTS(list)); +} + +void handleCommand( + std::string_view rInPath, std::string_view rOutPath, + const std::string& rExecutable) +{ + OStringBuffer buf; + if (rExecutable == "uiex" || rExecutable == "hrcex") + { +#if !defined _WIN32 + // For now, this is only needed by some Linux ASan builds, so keep it simply and disable it + // on Windows (which doesn't support the relevant shell syntax for (un-)setting environment + // variables). + auto const n = libraryPathEnvVarOverride.indexOf('='); + if (n == -1) { + buf.append("unset -v " + libraryPathEnvVarOverride + " && "); + } else { + buf.append(libraryPathEnvVarOverride + " "); + } +#endif + auto const env = getenv("SRC_ROOT"); + assert(env != nullptr); + buf.append(OString::Concat(env) + "/solenv/bin/"); + } + else + { +#if defined MACOSX + if (auto const env = getenv("DYLD_LIBRARY_PATH")) { + buf.append(OString::Concat("DYLD_LIBRARY_PATH=") + env + " "); + } +#endif + auto const env = getenv("WORKDIR_FOR_BUILD"); + assert(env != nullptr); + buf.append(OString::Concat(env) + "/LinkTarget/Executable/"); + } + buf.append(OString::Concat(std::string_view(rExecutable)) + + " -i " + rInPath + " -o " + rOutPath); + + if (system(buf.getStr()) != 0) + { + std::cerr << "Error: Failed to execute " << buf.getStr() << '\n'; + throw false; //TODO + } +} + +void InitPoFile( + std::string_view rProject, std::string_view rInPath, + std::string_view rPotDir, const OString& rOutPath ) +{ + //Create directory for po file + { + OUString outDir = + OStringToOUString( + rPotDir.substr(0,rPotDir.rfind('/')), RTL_TEXTENCODING_UTF8); + OUString outDirUrl; + if (osl::FileBase::getFileURLFromSystemPath(outDir, outDirUrl) + != osl::FileBase::E_None) + { + std::cerr + << ("Error: Cannot convert pathname to URL in " __FILE__ + ", in line ") + << __LINE__ << "\n outDir: " + << outDir + << "\n"; + throw false; //TODO + } + osl::Directory::createPath(outDirUrl); + } + + //Add header to the po file + PoOfstream aPoOutPut; + aPoOutPut.open(rOutPath); + if (!aPoOutPut.isOpen()) + { + std::cerr + << "Error: Cannot open po file " + << rOutPath << "\n"; + throw false; //TODO + } + + const size_t nProjectInd = rInPath.find(rProject); + const std::string_view relativPath = + rInPath.substr(nProjectInd, rInPath.rfind('/')- nProjectInd); + + PoHeader aTmp(relativPath); + aPoOutPut.writeHeader(aTmp); + aPoOutPut.close(); +} + +bool fileExists(const OString& fileName) +{ + FILE *f = fopen(fileName.getStr(), "r"); + + if (f != nullptr) + { + fclose(f); + return true; + } + + return false; +} + +OString gDestRoot; + +bool handleFile(std::string_view rProject, const OUString& rUrl, std::string_view rPotDir) +{ + struct Command { + std::u16string_view extension; + std::string executable; + bool positive; + }; + static Command const commands[] = { + { std::u16string_view(u".hrc"), "hrcex", false }, + { std::u16string_view(u".ulf"), "ulfex", false }, + { std::u16string_view(u".xcu"), "cfgex", false }, + { std::u16string_view(u".xrm"), "xrmex", false }, + { std::u16string_view(u"description.xml"), "xrmex", true }, + { std::u16string_view(u".xhp"), "helpex", false }, + { std::u16string_view(u".properties"), "propex", false }, + { std::u16string_view(u".ui"), "uiex", false }, + { std::u16string_view(u".tree"), "treex", false } }; + for (size_t i = 0; i != std::size(commands); ++i) + { + if (rUrl.endsWith(commands[i].extension) && + (commands[i].executable != "propex" || rUrl.indexOf("en_US") != -1)) + { + if (commands[i].positive ? passesPositiveList(rUrl) : passesNegativeList(rUrl)) + { + //Get input file path + OString sInPath; + { + OUString sInPathTmp; + if (osl::FileBase::getSystemPathFromFileURL(rUrl, sInPathTmp) != + osl::FileBase::E_None) + { + std::cerr << "osl::FileBase::getSystemPathFromFileURL(" << rUrl << ") failed\n"; + throw false; //TODO + } + sInPath = OUStringToOString( sInPathTmp, RTL_TEXTENCODING_UTF8 ); + } + OString sOutPath; + bool bCreatedFile = false; + bool bSimpleModuleCase = commands[i].executable == "uiex" || commands[i].executable == "hrcex"; + if (bSimpleModuleCase) + sOutPath = gDestRoot + "/" + rProject + "/messages.pot"; + else + sOutPath = OString::Concat(rPotDir) + ".pot"; + + if (!fileExists(sOutPath)) + { + InitPoFile(rProject, sInPath, rPotDir, sOutPath); + bCreatedFile = true; + } + handleCommand(sInPath, sOutPath, commands[i].executable); + + { + //Delete pot file if it contain only the header + PoIfstream aPOStream(sOutPath); + PoEntry aPO; + aPOStream.readEntry( aPO ); + bool bDel = aPOStream.eof(); + aPOStream.close(); + + if (bDel) + { + if ( system(OString("rm " + sOutPath).getStr()) != 0 ) + { + std::cerr + << "Error: Cannot remove entryless pot file: " + << sOutPath << "\n"; + throw false; //TODO + } + } + else if (bCreatedFile && bSimpleModuleCase) + { + // add one stock Add, Cancel, Close, Help, No, OK, Yes entry to each module.po + // and duplicates in .ui files then filtered out by solenv/bin/uiex + + std::ofstream aOutPut; + aOutPut.open(sOutPath.getStr(), std::ios_base::out | std::ios_base::app); + + aOutPut << "#. wH3TZ\nmsgctxt \"stock\"\nmsgid \"_Add\"\nmsgstr \"\"\n\n"; + aOutPut << "#. S9dsC\nmsgctxt \"stock\"\nmsgid \"_Apply\"\nmsgstr \"\"\n\n"; + aOutPut << "#. TMo6G\nmsgctxt \"stock\"\nmsgid \"_Cancel\"\nmsgstr \"\"\n\n"; + aOutPut << "#. MRCkv\nmsgctxt \"stock\"\nmsgid \"_Close\"\nmsgstr \"\"\n\n"; + aOutPut << "#. nvx5t\nmsgctxt \"stock\"\nmsgid \"_Delete\"\nmsgstr \"\"\n\n"; + aOutPut << "#. YspCj\nmsgctxt \"stock\"\nmsgid \"_Edit\"\nmsgstr \"\"\n\n"; + aOutPut << "#. imQxr\nmsgctxt \"stock\"\nmsgid \"_Help\"\nmsgstr \"\"\n\n"; + aOutPut << "#. RbjyB\nmsgctxt \"stock\"\nmsgid \"_New\"\nmsgstr \"\"\n\n"; + aOutPut << "#. dx2yy\nmsgctxt \"stock\"\nmsgid \"_No\"\nmsgstr \"\"\n\n"; + aOutPut << "#. M9DsL\nmsgctxt \"stock\"\nmsgid \"_OK\"\nmsgstr \"\"\n\n"; + aOutPut << "#. VtJS9\nmsgctxt \"stock\"\nmsgid \"_Remove\"\nmsgstr \"\"\n\n"; + aOutPut << "#. C69Fy\nmsgctxt \"stock\"\nmsgid \"_Reset\"\nmsgstr \"\"\n\n"; + aOutPut << "#. mgpxh\nmsgctxt \"stock\"\nmsgid \"_Yes\"\nmsgstr \"\"\n"; + + aOutPut.close(); + } + } + + + return true; + } + break; + } + } + return false; +} + +void handleFilesOfDir( + std::vector<OUString>& aFiles, std::string_view rProject, + std::string_view rPotDir ) +{ + ///Handle files in lexical order + std::sort(aFiles.begin(), aFiles.end()); + + for (auto const& elem : aFiles) + handleFile(rProject, elem, rPotDir); +} + +bool includeProject(std::string_view rProject) { + static const char *projects[] = { + "include", + "accessibility", + "avmedia", + "basctl", + "basic", + "chart2", + "connectivity", + "cui", + "dbaccess", + "desktop", + "dictionaries", + "editeng", + "extensions", + "extras", + "filter", + "forms", + "formula", + "fpicker", + "framework", + "helpcontent2", + "instsetoo_native", + "librelogo", + "mysqlc", + "nlpsolver", + "officecfg", + "oox", + "readlicense_oo", + "reportbuilder", + "reportdesign", + "sc", + "scaddins", + "sccomp", + "scp2", + "sd", + "sdext", + "setup_native", + "sfx2", + "shell", + "starmath", + "svl", + "svtools", + "svx", + "sw", + "swext", + "sysui", + "uui", + "vcl", + "wizards", + "writerperfect", + "xmlsecurity" }; + for (size_t i = 0; i != SAL_N_ELEMENTS(projects); ++i) { + if (rProject == projects[i]) { + return true; + } + } + return false; +} + +/// Handle one directory in the hierarchy. +/// +/// Ignores symlinks and instead explicitly descends into clone/* or src/*, +/// as the Cygwin symlinks are not supported by osl::Directory on Windows. +/// +/// @param rUrl the absolute file URL of this directory +/// +/// @param nLevel 0 if this is the root directory (core repository) +/// that contains the individual modules. 1 if it is a toplevel module and +/// larger values for the subdirectories. +/// +/// @param rProject the name of the project (empty and ignored if nLevel <= 0) +/// @param rPotDir the path of pot directory +void handleDirectory( + const OUString& rUrl, int nLevel, + const OString& rProject, const OString& rPotDir) +{ + osl::Directory dir(rUrl); + if (dir.open() != osl::FileBase::E_None) { + std::cerr + << "Error: Cannot open directory: " << rUrl << '\n'; + throw false; //TODO + } + std::vector<OUString> aFileNames; + std::map<OUString, std::map<OString, OString>> aSubDirs; + for (;;) { + osl::DirectoryItem item; + osl::FileBase::RC e = dir.getNextItem(item); + if (e == osl::FileBase::E_NOENT) { + break; + } + if (e != osl::FileBase::E_None) { + std::cerr << "Error: Cannot read directory\n"; + throw false; //TODO + } + osl::FileStatus stat( + osl_FileStatus_Mask_Type | osl_FileStatus_Mask_FileName + | osl_FileStatus_Mask_FileURL); + if (item.getFileStatus(stat) != osl::FileBase::E_None) { + std::cerr << "Error: Cannot get file status\n"; + throw false; //TODO + } + const OString sDirName = + OUStringToOString(stat.getFileName(),RTL_TEXTENCODING_UTF8); + switch (nLevel) + { + case 0: // a root directory + if (stat.getFileType() == osl::FileStatus::Directory && includeProject(sDirName)) + aSubDirs[stat.getFileURL()][sDirName] = rPotDir + "/" + sDirName; + break; + default: + if (stat.getFileType() == osl::FileStatus::Directory) + aSubDirs[stat.getFileURL()][rProject] = rPotDir + "/" + sDirName; + else + aFileNames.push_back(stat.getFileURL()); + break; + } + } + + OString aPotDir(rPotDir); + if( !aFileNames.empty() ) + { + OString aProject(rProject); + if (aProject == "include" && nLevel > 1) + { + aProject = aPotDir.copy(aPotDir.lastIndexOf('/') + 1); + aPotDir = aPotDir.subView(0, aPotDir.lastIndexOf("include")) + aProject + "/messages"; + } + if (aProject != "include") + { + handleFilesOfDir(aFileNames, aProject, aPotDir); + } + } + + if (dir.close() != osl::FileBase::E_None) { + std::cerr << "Error: Cannot close directory\n"; + throw false; //TODO + } + + for (auto const& elem : aSubDirs) + handleDirectory(elem.first, nLevel + 1, elem.second.begin()->first, + elem.second.begin()->second); + + //Remove empty pot directory + OUString sPoPath = + OStringToOUString( + aPotDir.subView(0,aPotDir.lastIndexOf('/')), RTL_TEXTENCODING_UTF8); + OUString sPoUrl; + if (osl::FileBase::getFileURLFromSystemPath(sPoPath, sPoUrl) + != osl::FileBase::E_None) + { + std::cerr + << ("Error: Cannot convert pathname to URL in " __FILE__ + ", in line ") + << __LINE__ << "\n" + << sPoPath + << "\n"; + throw false; //TODO + } + osl::Directory::remove(sPoUrl); +} + +void handleProjects(char const * sSourceRoot, char const * sDestRoot) +{ + OUString root16; + if (!rtl_convertStringToUString( + &root16.pData, sSourceRoot, rtl_str_getLength(sSourceRoot), + osl_getThreadTextEncoding(), + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR))) + { + std::cerr << "Error: Cannot convert pathname to UTF-16\n"; + throw false; //TODO + } + OUString rootUrl; + if (osl::FileBase::getFileURLFromSystemPath(root16, rootUrl) + != osl::FileBase::E_None) + { + std::cerr + << ("Error: Cannot convert pathname to URL in " __FILE__ + ", in line ") + << __LINE__ << "\n root16: " + << root16 + << "\n"; + throw false; //TODO + } + gDestRoot = OString(sDestRoot); + handleDirectory(rootUrl, 0, OString(), gDestRoot); +} +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + try + { + if (argc != 4) + { + std::cerr + << ("localize (c)2001 by Sun Microsystems\n\n" + "As part of the L10N framework, localize extracts en-US\n" + "strings for translation out of the toplevel modules defined\n" + "in projects array in l10ntools/source/localize.cxx.\n\n" + "Syntax: localize <source-root> <outfile> <library-path-env-var-override>\n"); + exit(EXIT_FAILURE); + } + libraryPathEnvVarOverride = argv[3]; + handleProjects(argv[1],argv[2]); + } + catch (std::exception& e) + { + std::cerr << "exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } + catch (bool) //TODO + { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/merge.cxx b/l10ntools/source/merge.cxx new file mode 100644 index 0000000000..88a39173aa --- /dev/null +++ b/l10ntools/source/merge.cxx @@ -0,0 +1,344 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> +#include <sal/log.hxx> + +#include <algorithm> +#include <cstdlib> +#include <fstream> +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +#include <export.hxx> +#include <po.hxx> + +namespace +{ + OString lcl_NormalizeFilename(std::string_view rFilename) + { + size_t idx1 = rFilename.rfind( '\\' ); + size_t idx2 = rFilename.rfind( '/' ); + if (idx1 == std::string_view::npos && idx2 == std::string_view::npos) + return OString(rFilename); + if (idx1 == std::string_view::npos) + idx1 = 0; + if (idx2 == std::string_view::npos) + idx2 = 0; + return OString(rFilename.substr(std::max(idx1, idx2)+1)); + }; + + bool lcl_ReadPoChecked( + PoEntry& o_rPoEntry, PoIfstream& rPoFile, + const OString& rFileName) + { + try + { + rPoFile.readEntry( o_rPoEntry ); + } + catch (const PoIfstream::Exception&) + { + SAL_WARN("l10ntools", rFileName << " contains invalid entry"); + return false; + } + return true; + } +} + + + + +ResData::ResData( OString _sGId ) + : + sGId(std::move( _sGId )) +{ + sGId = sGId.replaceAll("\r"_ostr, OString()); +} + +ResData::ResData( OString _sGId, OString _sFilename) + : + sGId(std::move( _sGId )), + sFilename(std::move( _sFilename )) +{ + sGId = sGId.replaceAll("\r"_ostr, OString()); +} + + + + +bool MergeEntrys::GetText( OString &rReturn, + const OString &nLangIndex, bool bDel ) +{ + bool bReturn = true; + rReturn = sText[ nLangIndex ]; + if ( bDel ) + sText[ nLangIndex ] = ""_ostr; + bReturn = bTextFirst[ nLangIndex ]; + bTextFirst[ nLangIndex ] = false; + return bReturn; +} + +namespace +{ + OString GetDoubleBars() + { + //DOUBLE VERTICAL LINE instead of || because the translations make their + //way into action_names under gtk3 where || is illegal + return u8"\u2016"_ostr; + } +} + +OString MergeEntrys::GetQTZText(const ResData& rResData, std::string_view rOrigText) +{ + const OString sFilename = rResData.sFilename.copy(rResData.sFilename.lastIndexOf('/')+1); + const OString sKey = + PoEntry::genKeyId(sFilename + rResData.sGId + rResData.sId + rResData.sResTyp + rOrigText); + return sKey + GetDoubleBars() + rOrigText; +} + + + +MergeDataFile::MergeDataFile( + const OString &rFileName, std::string_view rFile, + bool bCaseSensitive, bool bWithQtz ) +{ + auto const env = getenv("ENABLE_RELEASE_BUILD"); + OString sEnableReleaseBuild(env == nullptr ? "" : env); + + std::ifstream aInputStream( rFileName.getStr() ); + if ( !aInputStream.is_open() ) + { + SAL_WARN("l10ntools", "Can't open po path container file for " << rFileName); + return; + } + std::string sPoFile; + aInputStream >> sPoFile; + bool bFirstLang = true; + while( !aInputStream.eof() ) + { + bool bSkipCurrentPOFile = false; + const OString sFileName( lcl_NormalizeFilename(rFile) ); + const bool bReadAll = sFileName.isEmpty(); + // coverity[tainted_data] - this is a build time tool + const OString sPoFileName(sPoFile.data(), static_cast<sal_Int32>(sPoFile.length())); + PoIfstream aPoInput; + aPoInput.open( sPoFileName ); + if ( !aPoInput.isOpen() ) + { + SAL_WARN("l10ntools", "Can't open file: " << sPoFileName); + return; + } + + OString sLang; + //Get language id from path + { + static constexpr OString sTransSource("translations/source/"_ostr); + const sal_Int32 nStart = + sPoFileName.indexOf(sTransSource)+sTransSource.getLength(); + const sal_Int32 nCount = + sPoFileName.indexOf('/',nStart) - nStart; + sLang = sPoFileName.copy(nStart,nCount); + } + aLanguageSet.insert( sLang ); + PoEntry aNextPo; + do + { + if( !lcl_ReadPoChecked(aNextPo, aPoInput, sPoFileName) ) + { + bSkipCurrentPOFile = true; + break; + } + } while( !aPoInput.eof() && aNextPo.getSourceFile() != sFileName && !bReadAll ); + while( !aPoInput.eof() && (aNextPo.getSourceFile() == sFileName || bReadAll ) && !bSkipCurrentPOFile ) + { + PoEntry aActPo( aNextPo ); + + bool bInSameComp = false; + OString sText; + OString sQHText; + OString sTitle; + OString sExText; + OString sExQHText; + OString sExTitle; + do + { + if( bInSameComp ) + aActPo = aNextPo; + OString sTemp = aActPo.getMsgStr(); + if( aActPo.isFuzzy() || sTemp.isEmpty() ) + sTemp = aActPo.getMsgId(); + switch( aActPo.getType() ) + { + case PoEntry::TTEXT: + sText = sTemp; + sExText = aActPo.getMsgId(); + break; + case PoEntry::TQUICKHELPTEXT: + sQHText = sTemp; + sExQHText = aActPo.getMsgId(); + break; + case PoEntry::TTITLE: + sTitle = sTemp; + sExTitle = aActPo.getMsgId(); + break; + } + if( !lcl_ReadPoChecked(aNextPo, aPoInput, sPoFileName) ) + { + bSkipCurrentPOFile = true; + break; + } + if (aPoInput.eof()) + break; + bInSameComp = PoEntry::IsInSameComp(aActPo, aNextPo); + } while( bInSameComp ); + + InsertEntry( + aActPo.getResourceType(), aActPo.getGroupId(), + aActPo.getLocalId(), sLang, sText, + sQHText, sTitle, aActPo.getSourceFile(), + bFirstLang, bCaseSensitive ); + + if( bFirstLang && bWithQtz && + sEnableReleaseBuild != "TRUE" ) + { + aLanguageSet.insert("qtz"_ostr); + InsertEntry( + aActPo.getResourceType(), aActPo.getGroupId(), + aActPo.getLocalId(), "qtz"_ostr, + sExText, sExQHText, + sExTitle, aActPo.getSourceFile(), + false, bCaseSensitive ); + } + } + aPoInput.close(); + aInputStream >> sPoFile; + bFirstLang = false; + } + aInputStream.close(); +} + +MergeDataFile::~MergeDataFile() +{ +} + +std::vector<OString> MergeDataFile::GetLanguages() const +{ + return std::vector<OString>(aLanguageSet.begin(),aLanguageSet.end()); +} + +MergeEntrys *MergeDataFile::GetMergeData( ResData *pResData , bool bCaseSensitive ) +{ + OString sOldG = pResData->sGId; + OString sOldL = pResData->sId; + OString sGID = pResData->sGId; + OString sLID; + if (sGID.isEmpty()) + sGID = pResData->sId; + else + sLID = pResData->sId; + pResData->sGId = sGID; + pResData->sId = sLID; + + OString sKey = CreateKey( pResData->sResTyp , pResData->sGId , pResData->sId , pResData->sFilename , bCaseSensitive ); + + auto mit = aMap.find( sKey ); + if(mit != aMap.end()) + { + pResData->sGId = sOldG; + pResData->sId = sOldL; + return mit->second.get(); + } + pResData->sGId = sOldG; + pResData->sId = sOldL; + return nullptr; +} + +MergeEntrys *MergeDataFile::GetMergeEntrys( ResData *pResData ) +{ + // search for requested MergeEntrys + return GetMergeData( pResData ); +} + +MergeEntrys *MergeDataFile::GetMergeEntrysCaseSensitive( ResData *pResData ) +{ + // search for requested MergeEntrys + return GetMergeData( pResData , true ); +} + +void MergeDataFile::InsertEntry( + std::string_view rTYP, std::string_view rGID, + std::string_view rLID, const OString &nLANG, + const OString &rTEXT, const OString &rQHTEXT, + const OString &rTITLE, std::string_view rInFilename, + bool bFirstLang, bool bCaseSensitive ) +{ + MergeEntrys *pMergeEntrys = nullptr; + + // search for MergeData + OString sKey = CreateKey(rTYP , rGID , rLID , rInFilename , bCaseSensitive); + + if( !bFirstLang ) + { + auto mit = aMap.find( sKey ); + if(mit != aMap.end()) + pMergeEntrys = mit->second.get(); + + } + + if( !pMergeEntrys ) + { + pMergeEntrys = new MergeEntrys; + if (!aMap.emplace( sKey, std::unique_ptr<MergeEntrys>(pMergeEntrys) ).second) + { + std::cerr << "Duplicate entry " << sKey << "\n"; + std::exit(EXIT_FAILURE); + } + } + + + // insert the cur string + if( nLANG =="qtz" ) + { + const OString sTemp = OString::Concat(rInFilename) + rGID + rLID + rTYP; + pMergeEntrys->InsertEntry( + nLANG, + rTEXT.isEmpty()? rTEXT : PoEntry::genKeyId(sTemp + rTEXT) + GetDoubleBars() + rTEXT, + rQHTEXT.isEmpty()? rQHTEXT : PoEntry::genKeyId(sTemp + rQHTEXT) + GetDoubleBars() + rQHTEXT, + rTITLE.isEmpty()? rTITLE : PoEntry::genKeyId(sTemp + rTITLE) + GetDoubleBars() + rTITLE ); + } + else + { + pMergeEntrys->InsertEntry( nLANG , rTEXT, rQHTEXT, rTITLE ); + } +} + +OString MergeDataFile::CreateKey(std::string_view rTYP, std::string_view rGID, + std::string_view rLID, std::string_view rFilename, bool bCaseSensitive) +{ + static const char sStroke[] = "-"; + OString sKey = OString::Concat(rTYP) + sStroke + rGID + sStroke + rLID + sStroke + + lcl_NormalizeFilename(rFilename); + if(bCaseSensitive) + return sKey; // officecfg case sensitive identifier + return sKey.toAsciiUpperCase(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/po.cxx b/l10ntools/source/po.cxx new file mode 100644 index 0000000000..fc0b5bbfbf --- /dev/null +++ b/l10ntools/source/po.cxx @@ -0,0 +1,644 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/crc.h> +#include <sal/log.hxx> + +#include <cstring> +#include <ctime> +#include <cassert> + +#include <vector> +#include <string> +#include <string_view> + +#include <po.hxx> +#include <helper.hxx> + +/** Container of po entry + + Provide all file operations related to LibreOffice specific + po entry and store it's attributes. +*/ +class GenPoEntry +{ +private: + OStringBuffer m_sExtractCom; + std::vector<OString> m_sReferences; + OString m_sMsgCtxt; + OString m_sMsgId; + OString m_sMsgIdPlural; + OString m_sMsgStr; + std::vector<OString> m_sMsgStrPlural; + bool m_bFuzzy; + bool m_bCFormat; + bool m_bNull; + +public: + GenPoEntry(); + + const std::vector<OString>& getReference() const { return m_sReferences; } + const OString& getMsgCtxt() const { return m_sMsgCtxt; } + const OString& getMsgId() const { return m_sMsgId; } + const OString& getMsgStr() const { return m_sMsgStr; } + bool isFuzzy() const { return m_bFuzzy; } + bool isNull() const { return m_bNull; } + + void setExtractCom(std::string_view rExtractCom) + { + m_sExtractCom = rExtractCom; + } + void setReference(const OString& rReference) + { + m_sReferences.push_back(rReference); + } + void setMsgCtxt(const OString& rMsgCtxt) + { + m_sMsgCtxt = rMsgCtxt; + } + void setMsgId(const OString& rMsgId) + { + m_sMsgId = rMsgId; + } + void setMsgStr(const OString& rMsgStr) + { + m_sMsgStr = rMsgStr; + } + + void writeToFile(std::ofstream& rOFStream) const; + void readFromFile(std::ifstream& rIFStream); +}; + +namespace +{ + // Convert a normal string to msg/po output string + OString lcl_GenMsgString(std::string_view rString) + { + if ( rString.empty() ) + return "\"\""_ostr; + + OString sResult = + "\"" + + helper::escapeAll(rString,"\n""\t""\r""\\""\"","\\n""\\t""\\r""\\\\""\\\"") + + "\""; + sal_Int32 nIndex = 0; + while((nIndex=sResult.indexOf("\\n",nIndex))!=-1) + { + if( !sResult.match("\\\\n", nIndex-1) && + nIndex!=sResult.getLength()-3) + { + sResult = sResult.replaceAt(nIndex,2,"\\n\"\n\""); + } + ++nIndex; + } + + if ( sResult.indexOf('\n') != -1 ) + return "\"\"\n" + sResult; + + return sResult; + } + + // Convert msg string to normal form + OString lcl_GenNormString(std::string_view rString) + { + return + helper::unEscapeAll( + rString.substr(1,rString.size()-2), + "\\n""\\t""\\r""\\\\""\\\"", + "\n""\t""\r""\\""\""); + } +} + +GenPoEntry::GenPoEntry() + : m_bFuzzy( false ) + , m_bCFormat( false ) + , m_bNull( false ) +{ +} + +void GenPoEntry::writeToFile(std::ofstream& rOFStream) const +{ + if ( rOFStream.tellp() != std::ofstream::pos_type( 0 )) + rOFStream << std::endl; + if ( !m_sExtractCom.isEmpty() ) + rOFStream + << "#. " + << m_sExtractCom.toString().replaceAll("\n"_ostr,"\n#. "_ostr) << std::endl; + for(const auto& rReference : m_sReferences) + rOFStream << "#: " << rReference << std::endl; + if ( m_bFuzzy ) + rOFStream << "#, fuzzy" << std::endl; + if ( m_bCFormat ) + rOFStream << "#, c-format" << std::endl; + if ( !m_sMsgCtxt.isEmpty() ) + rOFStream << "msgctxt " + << lcl_GenMsgString(m_sMsgCtxt) + << std::endl; + rOFStream << "msgid " + << lcl_GenMsgString(m_sMsgId) << std::endl; + if ( !m_sMsgIdPlural.isEmpty() ) + rOFStream << "msgid_plural " + << lcl_GenMsgString(m_sMsgIdPlural) + << std::endl; + if ( !m_sMsgStrPlural.empty() ) + for(auto & line : m_sMsgStrPlural) + rOFStream << line.copy(0,10) << lcl_GenMsgString(line.subView(10)) << std::endl; + else + rOFStream << "msgstr " + << lcl_GenMsgString(m_sMsgStr) << std::endl; +} + +void GenPoEntry::readFromFile(std::ifstream& rIFStream) +{ + *this = GenPoEntry(); + OString* pLastMsg = nullptr; + std::string sTemp; + getline(rIFStream,sTemp); + if( rIFStream.eof() || sTemp.empty() ) + { + m_bNull = true; + return; + } + while(!rIFStream.eof()) + { + OString sLine(sTemp.data(),sTemp.length()); + if (sLine.startsWith("#. ")) + { + if( !m_sExtractCom.isEmpty() ) + { + m_sExtractCom.append("\n"); + } + m_sExtractCom.append(sLine.subView(3)); + } + else if (sLine.startsWith("#: ")) + { + m_sReferences.push_back(sLine.copy(3)); + } + else if (sLine.startsWith("#, fuzzy")) + { + m_bFuzzy = true; + } + else if (sLine.startsWith("#, c-format")) + { + m_bCFormat = true; + } + else if (sLine.startsWith("msgctxt ")) + { + m_sMsgCtxt = lcl_GenNormString(sLine.subView(8)); + pLastMsg = &m_sMsgCtxt; + } + else if (sLine.startsWith("msgid ")) + { + m_sMsgId = lcl_GenNormString(sLine.subView(6)); + pLastMsg = &m_sMsgId; + } + else if (sLine.startsWith("msgid_plural ")) + { + m_sMsgIdPlural = lcl_GenNormString(sLine.subView(13)); + pLastMsg = &m_sMsgIdPlural; + } + else if (sLine.startsWith("msgstr ")) + { + m_sMsgStr = lcl_GenNormString(sLine.subView(7)); + pLastMsg = &m_sMsgStr; + } + else if (sLine.startsWith("msgstr[")) + { + // assume there are no more than 10 plural forms... + // and that plural strings are never split to multi-line in po + m_sMsgStrPlural.push_back(sLine.subView(0,10) + lcl_GenNormString(sLine.subView(10))); + } + else if (sLine.startsWith("\"") && pLastMsg) + { + OString sReference; + if (!m_sReferences.empty()) + { + sReference = m_sReferences.front(); + } + if (pLastMsg != &m_sMsgCtxt || sLine != Concat2View("\"" + sReference + "\\n\"")) + { + *pLastMsg += lcl_GenNormString(sLine); + } + } + else + break; + getline(rIFStream,sTemp); + } + } + +PoEntry::PoEntry() + : m_bIsInitialized( false ) +{ +} + +PoEntry::PoEntry( + std::string_view rSourceFile, std::string_view rResType, std::string_view rGroupId, + std::string_view rLocalId, std::string_view rHelpText, + const OString& rText, const TYPE eType ) + : m_bIsInitialized( false ) +{ + if( rSourceFile.empty() ) + throw NOSOURCFILE; + else if ( rResType.empty() ) + throw NORESTYPE; + else if ( rGroupId.empty() ) + throw NOGROUPID; + else if ( rText.isEmpty() ) + throw NOSTRING; + else if ( rHelpText.size() == 5 ) + throw WRONGHELPTEXT; + + m_pGenPo.reset( new GenPoEntry() ); + size_t idx = rSourceFile.rfind('/'); + if (idx == std::string_view::npos) + idx = 0; + OString sReference(rSourceFile.substr(idx+1)); + m_pGenPo->setReference(sReference); + + OString sMsgCtxt = + sReference + "\n" + + rGroupId + "\n" + + (rLocalId.empty() ? OString() : OString::Concat(rLocalId) + "\n") + + rResType; + switch(eType){ + case TTEXT: + sMsgCtxt += ".text"; break; + case TQUICKHELPTEXT: + sMsgCtxt += ".quickhelptext"; break; + case TTITLE: + sMsgCtxt += ".title"; break; + // Default case is unneeded because the type of eType has only three element + } + m_pGenPo->setMsgCtxt(sMsgCtxt); + m_pGenPo->setMsgId(rText); + m_pGenPo->setExtractCom(Concat2View( + ( !rHelpText.empty() ? OString::Concat(rHelpText) + "\n" : OString()) + + genKeyId( m_pGenPo->getReference().front() + rGroupId + rLocalId + rResType + rText ) )); + m_bIsInitialized = true; +} + +PoEntry::~PoEntry() +{ +} + +PoEntry::PoEntry( const PoEntry& rPo ) + : m_pGenPo( rPo.m_pGenPo ? new GenPoEntry( *(rPo.m_pGenPo) ) : nullptr ) + , m_bIsInitialized( rPo.m_bIsInitialized ) +{ +} + +PoEntry& PoEntry::operator=(const PoEntry& rPo) +{ + if( this == &rPo ) + { + return *this; + } + if( rPo.m_pGenPo ) + { + if( m_pGenPo ) + { + *m_pGenPo = *(rPo.m_pGenPo); + } + else + { + m_pGenPo.reset( new GenPoEntry( *(rPo.m_pGenPo) ) ); + } + } + else + { + m_pGenPo.reset(); + } + m_bIsInitialized = rPo.m_bIsInitialized; + return *this; +} + +PoEntry& PoEntry::operator=(PoEntry&& rPo) noexcept +{ + m_pGenPo = std::move(rPo.m_pGenPo); + m_bIsInitialized = std::move(rPo.m_bIsInitialized); + return *this; +} + +OString const & PoEntry::getSourceFile() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getReference().front(); +} + +OString PoEntry::getGroupId() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgCtxt().getToken(0,'\n'); +} + +OString PoEntry::getLocalId() const +{ + assert( m_bIsInitialized ); + const OString sMsgCtxt = m_pGenPo->getMsgCtxt(); + if (sMsgCtxt.indexOf('\n')==sMsgCtxt.lastIndexOf('\n')) + return OString(); + else + return sMsgCtxt.getToken(1,'\n'); +} + +OString PoEntry::getResourceType() const +{ + assert( m_bIsInitialized ); + const OString sMsgCtxt = m_pGenPo->getMsgCtxt(); + if (sMsgCtxt.indexOf('\n')==sMsgCtxt.lastIndexOf('\n')) + return sMsgCtxt.getToken(1,'\n').getToken(0,'.'); + else + return sMsgCtxt.getToken(2,'\n').getToken(0,'.'); +} + +PoEntry::TYPE PoEntry::getType() const +{ + assert( m_bIsInitialized ); + const OString sMsgCtxt = m_pGenPo->getMsgCtxt(); + const OString sType = sMsgCtxt.copy( sMsgCtxt.lastIndexOf('.') + 1 ); + assert( + (sType == "text" || sType == "quickhelptext" || sType == "title") ); + if ( sType == "text" ) + return TTEXT; + else if ( sType == "quickhelptext" ) + return TQUICKHELPTEXT; + else + return TTITLE; +} + +bool PoEntry::isFuzzy() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->isFuzzy(); +} + +// Get message context +const OString& PoEntry::getMsgCtxt() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgCtxt(); + +} + +// Get translation string in merge format +OString const & PoEntry::getMsgId() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgId(); +} + +// Get translated string in merge format +const OString& PoEntry::getMsgStr() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgStr(); + +} + +bool PoEntry::IsInSameComp(const PoEntry& rPo1,const PoEntry& rPo2) +{ + assert( rPo1.m_bIsInitialized && rPo2.m_bIsInitialized ); + return ( rPo1.getSourceFile() == rPo2.getSourceFile() && + rPo1.getGroupId() == rPo2.getGroupId() && + rPo1.getLocalId() == rPo2.getLocalId() && + rPo1.getResourceType() == rPo2.getResourceType() ); +} + +OString PoEntry::genKeyId(const OString& rGenerator) +{ + sal_uInt32 nCRC = rtl_crc32(0, rGenerator.getStr(), rGenerator.getLength()); + // Use simple ASCII characters, exclude I, l, 1 and O, 0 to avoid confusing IDs + static const char sSymbols[] = + "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz23456789"; + char sKeyId[6]; + for( short nKeyInd = 0; nKeyInd < 5; ++nKeyInd ) + { + sKeyId[nKeyInd] = sSymbols[(nCRC & 63) % strlen(sSymbols)]; + nCRC >>= 6; + } + sKeyId[5] = '\0'; + return sKeyId; +} + +namespace +{ + // Get actual time in "YEAR-MO-DA HO:MI+ZONE" form + OString lcl_GetTime() + { + time_t aNow = time(nullptr); + struct tm* pNow = localtime(&aNow); + char pBuff[50]; + strftime( pBuff, sizeof pBuff, "%Y-%m-%d %H:%M%z", pNow ); + return pBuff; + } +} + +// when updating existing files (pocheck), reuse provided po-header +PoHeader::PoHeader( std::string_view rExtSrc, const OString& rPoHeaderMsgStr ) + : m_pGenPo( new GenPoEntry() ) + , m_bIsInitialized( false ) +{ + m_pGenPo->setExtractCom(Concat2View(OString::Concat("extracted from ") + rExtSrc)); + m_pGenPo->setMsgStr(rPoHeaderMsgStr); + m_bIsInitialized = true; +} + +PoHeader::PoHeader( std::string_view rExtSrc ) + : m_pGenPo( new GenPoEntry() ) + , m_bIsInitialized( false ) +{ + m_pGenPo->setExtractCom(Concat2View(OString::Concat("extracted from ") + rExtSrc)); + m_pGenPo->setMsgStr( + "Project-Id-Version: PACKAGE VERSION\n" + "Report-Msgid-Bugs-To: https://bugs.libreoffice.org/enter_bug.cgi?" + "product=LibreOffice&bug_status=UNCONFIRMED&component=UI\n" + "POT-Creation-Date: " + lcl_GetTime() + + "\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" + "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" + "Language-Team: LANGUAGE <LL@li.org>\n" + "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=UTF-8\n" + "Content-Transfer-Encoding: 8bit\n" + "X-Accelerator-Marker: ~\n" + "X-Generator: LibreOffice\n"); + m_bIsInitialized = true; +} + +PoHeader::~PoHeader() +{ +} + +PoOfstream::PoOfstream() + : m_bIsAfterHeader( false ) +{ +} + +PoOfstream::PoOfstream(const OString& rFileName, OpenMode aMode ) + : m_bIsAfterHeader( false ) +{ + open( rFileName, aMode ); +} + +PoOfstream::~PoOfstream() +{ + if( isOpen() ) + { + close(); + } +} + +void PoOfstream::open(const OString& rFileName, OpenMode aMode ) +{ + assert( !isOpen() ); + if( aMode == TRUNC ) + { + m_aOutPut.open( rFileName.getStr(), + std::ios_base::out | std::ios_base::trunc ); + m_bIsAfterHeader = false; + } + else if( aMode == APP ) + { + m_aOutPut.open( rFileName.getStr(), + std::ios_base::out | std::ios_base::app ); + m_bIsAfterHeader = m_aOutPut.tellp() != std::ofstream::pos_type( 0 ); + } +} + +void PoOfstream::close() +{ + assert( isOpen() ); + m_aOutPut.close(); +} + +void PoOfstream::writeHeader(const PoHeader& rPoHeader) +{ + assert( isOpen() && !m_bIsAfterHeader && rPoHeader.m_bIsInitialized ); + rPoHeader.m_pGenPo->writeToFile( m_aOutPut ); + m_bIsAfterHeader = true; +} + +void PoOfstream::writeEntry( const PoEntry& rPoEntry ) +{ + assert( isOpen() && m_bIsAfterHeader && rPoEntry.m_bIsInitialized ); + rPoEntry.m_pGenPo->writeToFile( m_aOutPut ); +} + +namespace +{ + +// Check the validity of read entry +bool lcl_CheckInputEntry(const GenPoEntry& rEntry) +{ + // stock button labels don't have a reference/sourcefile - they are not extracted from ui files + // (explicitly skipped by solenv/bin/uiex) but instead inserted by l10ntools/source/localize.cxx + // into all module templates (see d5d905b480c2a9b1db982f2867e87b5c230d1ab9) + return !rEntry.getMsgCtxt().isEmpty() && + (rEntry.getMsgCtxt() == "stock" || !rEntry.getReference().empty()) && + !rEntry.getMsgId().isEmpty(); +} + +} + +PoIfstream::PoIfstream() + : m_bEof( false ) +{ +} + +PoIfstream::PoIfstream(const OString& rFileName) + : m_bEof( false ) +{ + open( rFileName ); +} + +PoIfstream::~PoIfstream() +{ + if( isOpen() ) + { + close(); + } +} + +void PoIfstream::open( const OString& rFileName, OString& rPoHeader ) +{ + assert( !isOpen() ); + m_aInPut.open( rFileName.getStr(), std::ios_base::in ); + + // capture header, updating timestamp and generator + std::string sTemp; + std::getline(m_aInPut,sTemp); + while( !sTemp.empty() && !m_aInPut.eof() ) + { + std::getline(m_aInPut,sTemp); + OString sLine(sTemp.data(),sTemp.length()); + if (sLine.startsWith("\"PO-Revision-Date")) + rPoHeader += "PO-Revision-Date: " + lcl_GetTime() + "\n"; + else if (sLine.startsWith("\"X-Generator")) + rPoHeader += "X-Generator: LibreOffice\n"; + else if (sLine.startsWith("\"")) + rPoHeader += lcl_GenNormString(sLine); + } + m_bEof = false; +} + +void PoIfstream::open( const OString& rFileName ) +{ + assert( !isOpen() ); + m_aInPut.open( rFileName.getStr(), std::ios_base::in ); + + // Skip header + std::string sTemp; + std::getline(m_aInPut,sTemp); + while( !sTemp.empty() && !m_aInPut.eof() ) + { + std::getline(m_aInPut,sTemp); + } + m_bEof = false; +} + +void PoIfstream::close() +{ + assert( isOpen() ); + m_aInPut.close(); +} + +void PoIfstream::readEntry( PoEntry& rPoEntry ) +{ + assert( isOpen() && !eof() ); + GenPoEntry aGenPo; + aGenPo.readFromFile( m_aInPut ); + if( aGenPo.isNull() ) + { + m_bEof = true; + rPoEntry = PoEntry(); + } + else + { + if( lcl_CheckInputEntry(aGenPo) ) + { + if( rPoEntry.m_pGenPo ) + { + *(rPoEntry.m_pGenPo) = aGenPo; + } + else + { + rPoEntry.m_pGenPo.reset( new GenPoEntry( aGenPo ) ); + } + rPoEntry.m_bIsInitialized = true; + } + else + { + SAL_WARN("l10ntools", "Parse problem with entry: " << aGenPo.getMsgStr()); + throw PoIfstream::Exception(); + } + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/pocheck.cxx b/l10ntools/source/pocheck.cxx new file mode 100644 index 0000000000..0dcb2d0dce --- /dev/null +++ b/l10ntools/source/pocheck.cxx @@ -0,0 +1,430 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <cassert> +#include <iostream> +#include <map> +#include <vector> +#include <rtl/string.hxx> +#include <rtl/ustring.hxx> +#include <osl/file.hxx> +#include <po.hxx> + +// Translated style names must be unique +static void checkStyleNames(const OString& aLanguage) +{ + std::map<OString,sal_uInt16> aLocalizedStyleNames; + std::map<OString,sal_uInt16> aLocalizedNumStyleNames; + std::vector<PoEntry> repeatedEntries; + + OString aPoPath = OString::Concat(getenv("SRC_ROOT")) + + "/translations/source/" + + aLanguage + "/sw/messages.po"; + PoIfstream aPoInput; + aPoInput.open(aPoPath); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + bool bRepeated = false; + if( aPoInput.eof() ) + { + break; + } + + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt().startsWith("STR_POOLCOLL") ) + { + const OString& aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedStyleNames.find(aMsgStr) == aLocalizedStyleNames.end() ) + aLocalizedStyleNames[aMsgStr] = 1; + else { + aLocalizedStyleNames[aMsgStr]++; + bRepeated = true; + } + } + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt().startsWith("STR_POOLNUMRULE") ) + { + const OString& aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedNumStyleNames.find(aMsgStr) == aLocalizedNumStyleNames.end() ) + aLocalizedNumStyleNames[aMsgStr] = 1; + else { + aLocalizedNumStyleNames[aMsgStr]++; + bRepeated = true; + } + } + if (bRepeated) + repeatedEntries.push_back(aPoEntry); + } + aPoInput.close(); + + for (auto const& localizedStyleName : aLocalizedStyleNames) + { + if( localizedStyleName.second > 1 ) + { + std::cout << "ERROR: Style name translations must be unique in:\n" << + aPoPath << "\nLanguage: " << aLanguage << "\nDuplicated translation is: " << localizedStyleName.first << + "\nSee STR_POOLCOLL_*\n\n"; + } + } + for (auto const& localizedNumStyleName : aLocalizedNumStyleNames) + { + if( localizedNumStyleName.second > 1 ) + { + std::cout << "ERROR: Style name translations must be unique in:\n" << + aPoPath << "\nLanguage: " << aLanguage << "\nDuplicated translation is: " << localizedNumStyleName.first << + "\nSee STR_POOLNUMRULE_*\n\n"; + } + } + OString sPoHdrMsg; + aPoInput.open(aPoPath, sPoHdrMsg); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + PoOfstream aPoOutput; + aPoOutput.open(aPoPath+".new"); + PoHeader aTmp("sw/inc", sPoHdrMsg); + aPoOutput.writeHeader(aTmp); + bool bAnyError = false; + + for(;;) + { + PoEntry aPoEntry; + bool bError = false; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + for (auto const& repeatedEntry : repeatedEntries) + { + if (repeatedEntry.getMsgId() == aPoEntry.getMsgId() && repeatedEntry.getMsgCtxt() == aPoEntry.getMsgCtxt()) { + bError = true; + break; + } + } + if (bError) { + bAnyError = true; + } else { + aPoOutput.writeEntry(aPoEntry); + } + } + aPoInput.close(); + aPoOutput.close(); + OUString aPoPathURL; + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPath, RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bAnyError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); +} + +// Translated spreadsheet function names must be unique +static void checkFunctionNames(const OString& aLanguage) +{ + std::map<OString,sal_uInt16> aLocalizedFunctionNames; + std::map<OString,sal_uInt16> aLocalizedCoreFunctionNames; + + std::vector<PoEntry> repeatedEntries; + + OString aPoPaths[2]; + OUString aPoPathURL; + + aPoPaths[0] = OString::Concat(getenv("SRC_ROOT")) + + "/translations/source/" + + aLanguage + + "/formula/messages.po"; + PoIfstream aPoInput; + OString sPoHdrMsg; + aPoInput.open(aPoPaths[0], sPoHdrMsg); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPaths[0] << std::endl; + return; + } + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt() == "RID_STRLIST_FUNCTION_NAMES" ) + { + const OString& aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedCoreFunctionNames.find(aMsgStr) == aLocalizedCoreFunctionNames.end() ) + aLocalizedCoreFunctionNames[aMsgStr] = 1; + if( aLocalizedFunctionNames.find(aMsgStr) == aLocalizedFunctionNames.end() ) { + aLocalizedFunctionNames[aMsgStr] = 1; + } else { + aLocalizedFunctionNames[aMsgStr]++; + repeatedEntries.push_back(aPoEntry); + } + } + } + aPoInput.close(); + + aPoPaths[1] = OString::Concat(getenv("SRC_ROOT")) + + "/translations/source/" + + aLanguage + + "/scaddins/messages.po"; + aPoInput.open(aPoPaths[1]); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPaths[1] << std::endl; + return; + } + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt().startsWith("ANALYSIS_FUNCNAME") ) + { + OString aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedCoreFunctionNames.find(aMsgStr) != aLocalizedCoreFunctionNames.end() ) + aMsgStr += "_ADD"; + if( aLocalizedFunctionNames.find(aMsgStr) == aLocalizedFunctionNames.end() ) { + aLocalizedFunctionNames[aMsgStr] = 1; + } else { + aLocalizedFunctionNames[aMsgStr]++; + repeatedEntries.push_back(aPoEntry); + } + } + } + aPoInput.close(); + + for (auto const& localizedFunctionName : aLocalizedFunctionNames) + { + if( localizedFunctionName.second > 1 ) + { + std::cout + << ("ERROR: Spreadsheet function name translations must be" + " unique.\nLanguage: ") + << aLanguage << "\nDuplicated translation is: " << localizedFunctionName.first + << "\n\n"; + } + } + + for (int i=0;i<2;i++) + { + aPoInput.open(aPoPaths[i]); + if( !aPoInput.isOpen() ) + std::cerr << "Warning: Cannot open " << aPoPaths[i] << std::endl; + PoOfstream aPoOutput; + aPoOutput.open(aPoPaths[i]+".new"); + + switch (i) + { + case 0: + { + PoHeader hd("formula/inc", sPoHdrMsg); + aPoOutput.writeHeader(hd); + break; + } + case 1: + { + PoHeader hd("scaddins/inc", sPoHdrMsg); + aPoOutput.writeHeader(hd); + break; + } + } + bool bAnyError = false; + + for(;;) + { + PoEntry aPoEntry; + bool bError = false; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + for (auto const& repeatedEntry : repeatedEntries) + { + if (repeatedEntry.getMsgId() == aPoEntry.getMsgId() && repeatedEntry.getMsgCtxt() == aPoEntry.getMsgCtxt()) + { + bError = true; + break; + } + } + if (bError) + { + bAnyError = true; + } + else + { + aPoOutput.writeEntry(aPoEntry); + } + } + aPoInput.close(); + aPoOutput.close(); + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPaths[i], RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bAnyError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); + } +} + +// In instsetoo_native/inc_openoffice/windows/msi_languages.po +// where an en-US string ends with '|', translation must end +// with '|', too. +static void checkVerticalBar(const OString& aLanguage) +{ + OString aPoPath = OString::Concat(getenv("SRC_ROOT")) + + "/translations/source/" + + aLanguage + + "/instsetoo_native/inc_openoffice/windows/msi_languages.po"; + PoIfstream aPoInput; + aPoInput.open(aPoPath); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + PoOfstream aPoOutput; + aPoOutput.open(aPoPath+".new"); + PoHeader aTmp("instsetoo_native/inc_openoffice/windows/msi_languages"); + aPoOutput.writeHeader(aTmp); + bool bError = false; + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgId().endsWith("|") && + !aPoEntry.getMsgStr().isEmpty() && !aPoEntry.getMsgStr().endsWith("|") ) + { + std::cout + << ("ERROR: Missing '|' character at the end of translated" + " string.\nIt causes runtime error in installer.\nFile: ") + << aPoPath << std::endl + << "Language: " << aLanguage << std::endl + << "English: " << aPoEntry.getMsgId() << std::endl + << "Localized: " << aPoEntry.getMsgStr() << std::endl + << std::endl; + bError = true; + } + else + aPoOutput.writeEntry(aPoEntry); + } + aPoInput.close(); + aPoOutput.close(); + OUString aPoPathURL; + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPath, RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); +} + +// In starmath/source.po Math symbol names (from symbol.src) +// must not contain spaces +static void checkMathSymbolNames(const OString& aLanguage) +{ + OString aPoPath = OString::Concat(getenv("SRC_ROOT")) + + "/translations/source/" + + aLanguage + + "/starmath/messages.po"; + PoIfstream aPoInput; + aPoInput.open(aPoPath); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + PoOfstream aPoOutput; + aPoOutput.open(aPoPath+".new"); + PoHeader aTmp("starmath/inc"); + aPoOutput.writeHeader(aTmp); + bool bError = false; + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getGroupId() == "RID_UI_SYMBOL_NAMES" && + !aPoEntry.getMsgStr().isEmpty() && (aPoEntry.getMsgStr().indexOf(" ") != -1) ) + { + std::cout + << "ERROR: Math symbol names must not contain spaces.\nFile: " + << aPoPath << std::endl + << "Language: " << aLanguage << std::endl + << "English: " << aPoEntry.getMsgId() << std::endl + << "Localized: " << aPoEntry.getMsgStr() << std::endl + << std::endl; + bError = true; + } + else + aPoOutput.writeEntry(aPoEntry); + } + aPoInput.close(); + aPoOutput.close(); + OUString aPoPathURL; + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPath, RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); +} + +int main() +{ + try + { + auto const env = getenv("ALL_LANGS"); + assert(env != nullptr); + OString aLanguages(env); + if( aLanguages.isEmpty() ) + { + std::cerr << "Usage: LD_LIBRARY_PATH=instdir/program make cmd cmd=workdir/LinkTarget/Executable/pocheck\n"; + return 1; + } + for(sal_Int32 i = 1;;++i) // skip en-US + { + OString aLanguage = aLanguages.getToken(i,' '); + if( aLanguage.isEmpty() ) + break; + if( aLanguage == "qtz" ) + continue; + checkStyleNames(aLanguage); + checkFunctionNames(aLanguage); + checkVerticalBar(aLanguage); + checkMathSymbolNames(aLanguage); + } + return 0; + } + catch (std::exception& e) + { + std::cerr << "pocheck: exception " << e.what() << std::endl; + return 1; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/propex.cxx b/l10ntools/source/propex.cxx new file mode 100644 index 0000000000..9d62dd6884 --- /dev/null +++ b/l10ntools/source/propex.cxx @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/main.h> + +#include <common.hxx> +#include <propmerge.hxx> + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + common::HandledArgs aArgs; + if( !common::handleArguments(argc, argv, aArgs) ) + { + common::writeUsage("propex"_ostr,"*.properties"_ostr); + return 1; + } + + PropParser aParser( + aArgs.m_sInputFile, aArgs.m_sLanguage, aArgs.m_bMergeMode ); + if( !aParser.isInitialized() ) + { + return 1; + } + if( aArgs.m_bMergeMode ) + { + aParser.Merge( aArgs.m_sMergeSrc, aArgs.m_sOutputFile ); + } + else + { + aParser.Extract( aArgs.m_sOutputFile ); + } + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/propmerge.cxx b/l10ntools/source/propmerge.cxx new file mode 100644 index 0000000000..e17b2dd9c1 --- /dev/null +++ b/l10ntools/source/propmerge.cxx @@ -0,0 +1,232 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <rtl/ustring.hxx> +#include <o3tl/string_view.hxx> + +#include <memory> +#include <cstdlib> +#include <cassert> +#include <iostream> +#include <iomanip> +#include <string_view> + +#include <export.hxx> +#include <common.hxx> +#include <propmerge.hxx> +#include <utility> + +namespace +{ + //Find ascii escaped unicode + sal_Int32 lcl_IndexOfUnicode( + std::string_view rSource, const sal_Int32 nFrom = 0 ) + { + static constexpr std::string_view sHexDigits = "0123456789abcdefABCDEF"; + size_t nIndex = rSource.find( "\\u", nFrom ); + if( nIndex == std::string_view::npos ) + { + return -1; + } + bool bIsUnicode = true; + for( short nDist = 2; nDist <= 5; ++nDist ) + { + if( sHexDigits.find( rSource[nIndex + nDist] ) == std::string_view::npos ) + { + bIsUnicode = false; + } + } + return bIsUnicode ? nIndex : -1; + } + + //Convert ascii escaped unicode to utf-8 + OString lcl_ConvertToUTF8( const OString& rText ) + { + OString sResult = rText; + sal_Int32 nIndex = lcl_IndexOfUnicode( sResult ); + while( nIndex != -1 && nIndex < rText.getLength() ) + { + const OString sHex = sResult.copy( nIndex + 2, 4 ); + const sal_Unicode cDec = + static_cast<sal_Unicode>( strtol( sHex.getStr(), nullptr, 16 ) ); + const OString sNewChar( &cDec, 1, RTL_TEXTENCODING_UTF8 ); + sResult = sResult.replaceAll( "\\u" + sHex, sNewChar ); + nIndex = lcl_IndexOfUnicode( sResult, nIndex ); + } + return sResult; + } + + //Escape unicode characters + void lcl_PrintJavaStyle( std::string_view rText, std::ofstream &rOfstream ) + { + const OUString sTemp = + OStringToOUString( rText, RTL_TEXTENCODING_UTF8 ); + for ( sal_Int32 nIndex = 0; nIndex < sTemp.getLength(); ++nIndex ) + { + sal_Unicode cUniCode = sTemp[nIndex]; + if( cUniCode < 128 ) + { + rOfstream << static_cast<char>( cUniCode ); + } + else + { + rOfstream + << "\\u" + << std::setfill('0') << std::setw(2) << std::uppercase + << std::hex << (cUniCode >> 8) + << std::setfill('0') << std::setw(2) << std::uppercase + << std::hex << (cUniCode & 0xFF); + } + } + } +} + +//Open source file and store its lines +PropParser::PropParser( + OString _sInputFile, OString _sLang, + const bool bMergeMode ) + : m_sSource(std::move( _sInputFile )) + , m_sLang(std::move( _sLang )) + , m_bIsInitialized( false ) +{ + std::ifstream aIfstream( m_sSource.getStr() ); + if( aIfstream.is_open() ) + { + std::string s; + std::getline( aIfstream, s ); + while( !aIfstream.eof() ) + { + OString sLine( s.data(), s.length() ); + if( bMergeMode || + ( !sLine.startsWith(" *") && !sLine.startsWith("/*") ) ) + { + m_vLines.push_back( sLine ); + } + std::getline( aIfstream, s ); + } + } + else + { + std::cerr + << "Propex error: Cannot open source file: " + << m_sSource << std::endl; + return; + } + m_bIsInitialized = true; +} + +PropParser::~PropParser() +{ +} + +//Extract strings form source file +void PropParser::Extract( const OString& rPOFile ) +{ + assert( m_bIsInitialized ); + PoOfstream aPOStream( rPOFile, PoOfstream::APP ); + if( !aPOStream.isOpen() ) + { + std::cerr + << "Propex error: Cannot open pofile for extract: " + << rPOFile << std::endl; + return; + } + + for( size_t nIndex = 0; nIndex < m_vLines.size(); ++nIndex ) + { + const OString sLine = m_vLines[nIndex]; + const sal_Int32 nEqualSign = sLine.indexOf('='); + if( nEqualSign != -1 ) + { + std::string_view sID = o3tl::trim(sLine.subView( 0, nEqualSign )); + OString sText = lcl_ConvertToUTF8( OString(o3tl::trim(sLine.subView( nEqualSign + 1 ))) ); + + common::writePoEntry( + "Propex"_ostr, aPOStream, m_sSource, "property", + OString(sID), OString(), OString(), sText); + } + } + + aPOStream.close(); +} + +//Merge strings to source file +void PropParser::Merge( const OString &rMergeSrc, const OString &rDestinationFile ) +{ + assert( m_bIsInitialized ); + std::ofstream aDestination( + rDestinationFile.getStr(), std::ios_base::out | std::ios_base::trunc ); + if( !aDestination.is_open() ) { + std::cerr + << "Propex error: Cannot open source file for merge: " + << rDestinationFile << std::endl; + return; + } + + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( m_sLang != "qtz" ) + { + pMergeDataFile.reset( new MergeDataFile( rMergeSrc, m_sSource, false, false ) ); + + const std::vector<OString> vLanguages = pMergeDataFile->GetLanguages(); + if( !vLanguages.empty() && vLanguages[0] != m_sLang ) + { + std::cerr + << ("Propex error: given language conflicts with language of" + " Mergedata file: ") + << m_sLang << " - " + << vLanguages[0] << std::endl; + return; + } + } + + for( size_t nIndex = 0; nIndex < m_vLines.size(); ++nIndex ) + { + const OString sLine = m_vLines[nIndex]; + const sal_Int32 nEqualSign = sLine.indexOf('='); + if( !sLine.startsWith(" *") && !sLine.startsWith("/*") && + nEqualSign != -1 ) + { + const OString sID( o3tl::trim(sLine.subView( 0, sLine.indexOf('=') )) ); + ResData aResData( sID, m_sSource ); + aResData.sResTyp = "property"_ostr; + OString sNewText; + if( m_sLang == "qtz" ) + { + const OString sOriginText = lcl_ConvertToUTF8(OString(o3tl::trim(sLine.subView( nEqualSign + 1 )))); + sNewText = MergeEntrys::GetQTZText(aResData, sOriginText); + } + else if( pMergeDataFile ) + { + MergeEntrys* pEntrys = pMergeDataFile->GetMergeEntrys( &aResData ); + if( pEntrys ) + { + pEntrys->GetText( sNewText, m_sLang ); + } + } + if( !sNewText.isEmpty() ) + { + aDestination << OString(sID + "="); + lcl_PrintJavaStyle( sNewText, aDestination ); + aDestination << std::endl; + } + else + { + aDestination << sLine << std::endl; + } + } + else + { + aDestination << sLine << std::endl; + } + } + aDestination.close(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/src_yy_wrapper.c b/l10ntools/source/src_yy_wrapper.c new file mode 100644 index 0000000000..3fdcc392bf --- /dev/null +++ b/l10ntools/source/src_yy_wrapper.c @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +// Helper to suppress warnings in lex generated c code, see #i57362# +#include "src_yy.c" + +void (*avoid_unused_yyunput_in_src_yy_c)() = yyunput; +int (*avoid_unused_yy_flex_strlen_in_src_yy_c)() = yy_flex_strlen; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/treemerge.cxx b/l10ntools/source/treemerge.cxx new file mode 100644 index 0000000000..f6af927916 --- /dev/null +++ b/l10ntools/source/treemerge.cxx @@ -0,0 +1,285 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <iostream> +#include <cassert> +#include <cstring> + +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/xmlstring.h> + +#include <export.hxx> +#include <helper.hxx> +#include <common.hxx> +#include <po.hxx> +#include <treemerge.hxx> +#include <utility> + + +namespace +{ + // Extract strings from nodes on all level recursively + void lcl_ExtractLevel( + const xmlDocPtr pSource, const xmlNodePtr pRoot, + const xmlChar* pNodeName, PoOfstream& rPOStream ) + { + if( !pRoot->children ) + { + return; + } + for( xmlNodePtr pCurrent = pRoot->children->next; + pCurrent; pCurrent = pCurrent->next) + { + if (!xmlStrcmp(pCurrent->name, pNodeName)) + { + xmlChar* pID = xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("id")); + xmlChar* pText = + xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("title")); + + common::writePoEntry( + "Treex"_ostr, rPOStream, pSource->name, helper::xmlStrToOString( pNodeName ), + helper::xmlStrToOString( pID ), OString(), OString(), helper::xmlStrToOString( pText )); + + xmlFree( pID ); + xmlFree( pText ); + + lcl_ExtractLevel( + pSource, pCurrent, reinterpret_cast<const xmlChar *>("node"), + rPOStream ); + } + } + } + + // Update id and content of the topic + xmlNodePtr lcl_UpdateTopic( + const xmlNodePtr pCurrent, std::string_view rXhpRoot ) + { + xmlNodePtr pReturn = pCurrent; + xmlChar* pID = xmlGetProp(pReturn, reinterpret_cast<const xmlChar*>("id")); + const OString sID = + helper::xmlStrToOString( pID ); + xmlFree( pID ); + + const sal_Int32 nFirstSlash = sID.indexOf('/'); + // Update id attribute of topic + { + OString sNewID = + OString::Concat(sID.subView( 0, nFirstSlash + 1 )) + + rXhpRoot.substr( rXhpRoot.rfind('/') + 1 ) + + sID.subView( sID.indexOf( '/', nFirstSlash + 1 ) ); + xmlSetProp( + pReturn, reinterpret_cast<const xmlChar*>("id"), + reinterpret_cast<const xmlChar*>(sNewID.getStr())); + } + + const OString sXhpPath = + OString::Concat(rXhpRoot) + + sID.subView(sID.indexOf('/', nFirstSlash + 1)); + xmlDocPtr pXhpFile = xmlParseFile( sXhpPath.getStr() ); + // if xhpfile is missing than put this topic into comment + if ( !pXhpFile ) + { + xmlNodePtr pTemp = pReturn; + xmlChar* sNewID = + xmlGetProp(pReturn, reinterpret_cast<const xmlChar*>("id")); + xmlChar* sComment = + xmlStrcat( xmlCharStrdup("removed "), sNewID ); + pReturn = xmlNewComment( sComment ); + xmlReplaceNode( pTemp, pReturn ); + xmlFree( pTemp ); + xmlFree( sNewID ); + xmlFree( sComment ); + } + // update topic's content on the basis of xhpfile's title + else + { + xmlNodePtr pXhpNode = xmlDocGetRootElement( pXhpFile ); + for( pXhpNode = pXhpNode->children; + pXhpNode; pXhpNode = pXhpNode->children ) + { + while( pXhpNode->type != XML_ELEMENT_NODE ) + { + pXhpNode = pXhpNode->next; + } + if(!xmlStrcmp(pXhpNode->name, reinterpret_cast<const xmlChar *>("title"))) + { + xmlChar* sTitle = + xmlNodeListGetString(pXhpFile, pXhpNode->children, 1); + OString sNewTitle = + helper::xmlStrToOString( sTitle ). + replaceAll("$[officename]"_ostr,"%PRODUCTNAME"_ostr). + replaceAll("$[officeversion]"_ostr,"%PRODUCTVERSION"_ostr); + xmlChar *xmlString = xmlEncodeSpecialChars(nullptr, + reinterpret_cast<const xmlChar*>( sNewTitle.getStr() )); + xmlNodeSetContent( pReturn, xmlString); + xmlFree( xmlString ); + xmlFree( sTitle ); + break; + } + } + if( !pXhpNode ) + { + std::cerr + << "Treex error: Cannot find title in " + << sXhpPath << std::endl; + pReturn = nullptr; + } + xmlFreeDoc( pXhpFile ); + xmlCleanupParser(); + } + return pReturn; + } + // Localize title attribute of help_section and node tags + void lcl_MergeLevel( + xmlDocPtr io_pSource, const xmlNodePtr pRoot, + const xmlChar * pNodeName, MergeDataFile* pMergeDataFile, + const OString& rLang, const OString& rXhpRoot ) + { + if( !pRoot->children ) + { + return; + } + for( xmlNodePtr pCurrent = pRoot->children; + pCurrent; pCurrent = pCurrent->next) + { + if( !xmlStrcmp(pCurrent->name, pNodeName) ) + { + if( rLang != "en-US" ) + { + OString sNewText; + xmlChar* pID = xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("id")); + ResData aResData( + helper::xmlStrToOString( pID ), + static_cast<OString>(io_pSource->name) ); + xmlFree( pID ); + aResData.sResTyp = helper::xmlStrToOString( pNodeName ); + if( pMergeDataFile ) + { + MergeEntrys* pEntrys = + pMergeDataFile->GetMergeEntrys( &aResData ); + if( pEntrys ) + { + pEntrys->GetText( sNewText, rLang ); + } + } + else if( rLang == "qtz" ) + { + xmlChar* pText = xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("title")); + const OString sOriginText = helper::xmlStrToOString(pText); + xmlFree( pText ); + sNewText = MergeEntrys::GetQTZText(aResData, sOriginText); + } + if( !sNewText.isEmpty() ) + { + xmlSetProp( + pCurrent, reinterpret_cast<const xmlChar*>("title"), + reinterpret_cast<const xmlChar*>(sNewText.getStr())); + } + } + + lcl_MergeLevel( + io_pSource, pCurrent, reinterpret_cast<const xmlChar *>("node"), + pMergeDataFile, rLang, rXhpRoot ); + } + else if( !xmlStrcmp(pCurrent->name, reinterpret_cast<const xmlChar *>("topic")) ) + { + pCurrent = lcl_UpdateTopic( pCurrent, rXhpRoot ); + } + } + } +} + +TreeParser::TreeParser( + const OString& rInputFile, OString _sLang ) + : m_pSource( nullptr ) + , m_sLang(std::move( _sLang )) + , m_bIsInitialized( false ) +{ + m_pSource = xmlParseFile( rInputFile.getStr() ); + if ( !m_pSource ) { + std::cerr + << "Treex error: Cannot open source file: " + << rInputFile << std::endl; + return; + } + if( !m_pSource->name ) + { + m_pSource->name = static_cast<char *>(xmlMalloc(strlen(rInputFile.getStr())+1)); + strcpy( m_pSource->name, rInputFile.getStr() ); + } + m_bIsInitialized = true; +} + +TreeParser::~TreeParser() +{ + // be sure m_pSource is freed + if (m_bIsInitialized) + xmlFreeDoc( m_pSource ); +} + +void TreeParser::Extract( const OString& rPOFile ) +{ + assert( m_bIsInitialized ); + PoOfstream aPOStream( rPOFile, PoOfstream::APP ); + if( !aPOStream.isOpen() ) + { + std::cerr + << "Treex error: Cannot open po file for extract: " + << rPOFile << std::endl; + return; + } + + xmlNodePtr pRootNode = xmlDocGetRootElement( m_pSource ); + lcl_ExtractLevel( + m_pSource, pRootNode, reinterpret_cast<const xmlChar *>("help_section"), + aPOStream ); + + xmlFreeDoc( m_pSource ); + xmlCleanupParser(); + aPOStream.close(); + m_bIsInitialized = false; +} + +void TreeParser::Merge( + const OString &rMergeSrc, const OString &rDestinationFile, + const OString &rXhpRoot ) +{ + assert( m_bIsInitialized ); + + const xmlNodePtr pRootNode = xmlDocGetRootElement( m_pSource ); + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( m_sLang != "qtz" && m_sLang != "en-US" ) + { + pMergeDataFile.reset(new MergeDataFile( + rMergeSrc, static_cast<OString>( m_pSource->name ), false, false )); + const std::vector<OString> vLanguages = pMergeDataFile->GetLanguages(); + if( !vLanguages.empty() && vLanguages[0] != m_sLang ) + { + std::cerr + << ("Treex error: given language conflicts with language of" + " Mergedata file: ") + << m_sLang << " - " + << vLanguages[0] << std::endl; + return; + } + } + lcl_MergeLevel( + m_pSource, pRootNode, reinterpret_cast<const xmlChar *>("help_section"), + pMergeDataFile.get(), m_sLang, rXhpRoot ); + + pMergeDataFile.reset(); + xmlSaveFile( rDestinationFile.getStr(), m_pSource ); + xmlFreeDoc( m_pSource ); + xmlCleanupParser(); + m_bIsInitialized = false; +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/treex.cxx b/l10ntools/source/treex.cxx new file mode 100644 index 0000000000..4e8fe0b264 --- /dev/null +++ b/l10ntools/source/treex.cxx @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <iostream> +#include <cstring> +#include <sal/main.h> + +#include <common.hxx> +#include <treemerge.hxx> + +static void WriteUsage() +{ + std::cout + << ("Syntax: Treex [-r Root] -i FileIn -o FileOut" + " [-m DataBase] [-l Lang]\n" + " Root: Path to root of localized xhp files\n" + " FileIn: Source files (*.tree)\n" + " FileOut: Destination file (*.*)\n" + " DataBase: Mergedata (*.po)\n" + " Lang: Restrict the handled languages; one element of\n" + " (de, en-US, ...) or all\n"); +} + + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + OString sXHPRoot; + for (int nIndex = 1; nIndex != argc; ++nIndex) + { + if (std::strcmp(argv[nIndex], "-r") == 0) + { + sXHPRoot = OString( argv[nIndex + 1] ); + for( int nIndex2 = nIndex+3; nIndex2 < argc; nIndex2 = nIndex2 + 2 ) + { + argv[nIndex-3] = argv[nIndex-1]; + argv[nIndex-2] = argv[nIndex]; + } + argc = argc - 2; + break; + } + } + common::HandledArgs aArgs; + if( !common::handleArguments(argc, argv, aArgs) ) + { + WriteUsage(); + return 1; + } + + TreeParser aParser(aArgs.m_sInputFile, aArgs.m_sLanguage ); + if( !aParser.isInitialized() ) + { + return 1; + } + + if( aArgs.m_bMergeMode || !sXHPRoot.isEmpty() ) + { + aParser.Merge( aArgs.m_sMergeSrc, aArgs.m_sOutputFile, sXHPRoot ); + } + else + { + aParser.Extract( aArgs.m_sOutputFile ); + } + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/ulfconv/msi-encodinglist.txt b/l10ntools/source/ulfconv/msi-encodinglist.txt new file mode 100644 index 0000000000..eaa1754cf5 --- /dev/null +++ b/l10ntools/source/ulfconv/msi-encodinglist.txt @@ -0,0 +1,180 @@ +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# + +# Syntax: language 0 LCID +# 2nd field used to be the ANSI codepage, +# but now we are using UTF-8 everywhere. +# comment lines begin with hash +af 0 1078 # Afrikaans +am 0 1118 # Amharic +ar 0 1025 +ar-SA 0 1025 +as 0 1101 # Assamese +as-IN 0 1101 # Assamese +ast 0 1610 +be 0 1059 # Belarusian +be-BY 0 1059 +bg 0 1026 # Bulgarian +bn 0 2117 # Bengali +bn-BD 0 2117 # Bengali Bangladesh +bn-IN 0 1093 # Bengali India +bo 0 2121 +br 0 1150 # Breton +brx 0 1603 # Bodo (India) +bs 0 5146 # bosnian +ca 0 1027 # Catalan +ca-valencia 0 2051 # Catalan Valencian +ckb 0 1170 # Central Kurdish (Sorani) +cs 0 1029 # Czech +cy 0 1106 # Welsh +da 0 1030 +de 0 1031 +dgo 0 1604 # Dogri (India) +dsb 0 2094 # Lower Sorbian +dz 0 3153 # Dzongkha +el 0 1032 +en-GB 0 2057 +en-US 0 1033 +en-ZA 0 7177 +eo 0 1553 # Esperanto +es 0 1034 +et 0 1061 +eu 0 1069 # Basque +fa 0 1065 # Farsi +fi 0 1035 +fo 0 1080 # Faroese +fr 0 1036 +fr-CA 0 3084 +fy 0 1122 # Frisian +fur 0 1585 +ga 0 2108 # Irish +gd 0 1169 # Gaelic (Scotland) +gl 0 1110 # Galician +gu 0 1095 # Gujarati +gu-IN 0 1095 # Gujarati +gug 0 1140 # Guarani - Paraguay +he 0 1037 +hi 0 1081 +hr 0 1050 # Croatian +ht 0 1626 # Haitian +hu 0 1038 +hsb 0 1070 # Upper Sorbian +hy 0 1067 # Armenian +id 0 1057 # Indonesian +is 0 1039 # Icelandic +it 0 1040 +ja 0 1041 +jbo 0 1624 +ka 0 1079 # Georgian +kab 0 1625 # Kabyle +kk 0 1087 +km 0 1107 # Khmer +kmr-Latn 0 1574 +kn 0 1099 # Kannada +kn-IN 0 1099 # Kannada +ko 0 1042 +kok 0 1111 # Konkani +ks 0 1120 # Kashmiri +ky 0 1088 # Kyrgyz +ky-CN 0 1640 # Kyrgyz (China) +lb 0 1134 +lo 0 1108 # Lao +lt 0 1063 # Lithuanian +lv 0 1062 # Latvian +mai 0 1605 # Maithili (India) +mk 0 1071 # Macedonian +ml 0 1100 +ml-IN 0 1100 +mn 0 1104 # Mongolian +mni 0 1112 # Manipuri +mn-TR 0 2128 # Mongolian Classical/traditional +mr 0 1102 # Marathi +mr-IN 0 1102 +ms 0 1086 # Malay (Malaysian) +mt 0 1082 # Maltese +my 0 1109 # Burmese +nb 0 1044 +ne 0 1121 # Nepali +nl 0 1043 +nn 0 2068 +no 0 1044 +nr 0 1580 # Ndebele South +nso 0 1132 +ny 0 1598 +oc 0 1154 # Occitan-lengadocian +om 0 1138 # Oromo +or 0 1096 # Odia +or-IN 0 1096 +pa-IN 0 1094 # Punjabi +pap 0 2171 +pl 0 1045 +ps 0 2171 +pt 0 2070 +pt-BR 0 1046 +pt-PT 0 2070 +qtz 0 1638 # key id pseudo language +rm 0 1047 # Raeto-Romance +ro 0 1048 # Romanian +ru 0 1049 +rw 0 1159 # Kinyarwanda +sa-IN 0 1103 # Sanskrit +sat 0 1606 # Santali +sb 0 1070 # Sorbian +sc 0 3047 +sd 0 1113 # Sindhi +si 0 1115 # Sinhala +sid 0 1669 # Sidama, fake LCID +sk 0 1051 # Slovak +sl 0 1060 # Slovenian +sq 0 1052 # Albanian +sr 0 3098 # Serbian Cyrillic +sr-Latn 0 2074 # Serbian Latin +sr-SP 0 3098 # Serbian Cyrillic +ss 0 1579 # Swazi +st 0 1072 # Southern Sotho, Sutu +sv 0 1053 +sw 0 1089 # Swahili +sw-TZ 0 1089 # Swahili +szl 0 1689 # Silesian +so 0 1143 +ta 0 1097 # Tamil +ta-IN 0 1097 # Tamil +te 0 1098 +te-IN 0 1098 +tg 0 1064 # Tajik +th 0 1054 +ti 0 1139 # Tigrinya +ti-ER 0 1139 # Tigrinya +tn 0 1074 # Setsuana +tr 0 1055 # Turkish +ts 0 1073 # Tsonga +tk 0 1090 +tt 0 1092 # Tatar +ug 0 1152 +uk 0 1058 # Ukrainian +ur 0 1056 # Urdu +ur-IN 0 2080 +uz 0 1091 # Uzbek (Latin) +ve 0 1075 # Venda +vec 0 1685 # Venetian +vi 0 1066 # Vietnamese +xh 0 1076 # Xhosa +yi 0 1085 # Yiddish +zh-CN 0 2052 +zh-TW 0 1028 +zu 0 1077 # Zulu diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx new file mode 100644 index 0000000000..397e071a30 --- /dev/null +++ b/l10ntools/source/xmlparse.cxx @@ -0,0 +1,1108 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#include <sal/config.h> + +#include <cassert> +#include <stdio.h> +#include <string_view> + +#include <helper.hxx> +#include <utility> +#include <xmlparse.hxx> +#include <fstream> +#include <iostream> +#include <osl/file.hxx> +#include <osl/process.h> +#include <o3tl/string_view.hxx> +#include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> +#include <unicode/regex.h> + +using namespace osl; + +constexpr OString XML_LANG = "xml-lang"_ostr; + + + + +XMLChildNode::XMLChildNode( XMLParentNode *pPar ) + : m_pParent( pPar ) +{ + if ( m_pParent ) + m_pParent->AddChild( this ); +} + + +XMLChildNode::XMLChildNode( const XMLChildNode& rObj) + : XMLNode(rObj), + m_pParent(rObj.m_pParent) +{ +} + +XMLChildNode& XMLChildNode::operator=(const XMLChildNode& rObj) +{ + if(this != &rObj) + { + m_pParent=rObj.m_pParent; + } + return *this; +} + + + + +XMLParentNode::~XMLParentNode() +{ + if( m_pChildList ) + { + RemoveAndDeleteAllChildren(); + } +} + +XMLParentNode::XMLParentNode( const XMLParentNode& rObj) +: XMLChildNode( rObj ) +{ + if( !rObj.m_pChildList ) + return; + + m_pChildList.reset( new XMLChildNodeList ); + for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ ) + { + XMLChildNode* pNode = (*rObj.m_pChildList)[ i ]; + if( pNode != nullptr) + { + switch(pNode->GetNodeType()) + { + case XMLNodeType::ELEMENT: + AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break; + case XMLNodeType::DATA: + AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break; + case XMLNodeType::COMMENT: + AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break; + case XMLNodeType::DEFAULT: + AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break; + default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj"); + } + } + } +} + +XMLParentNode& XMLParentNode::operator=(const XMLParentNode& rObj) +{ + if(this!=&rObj) + { + XMLChildNode::operator=(rObj); + if( m_pChildList ) + { + RemoveAndDeleteAllChildren(); + } + if( rObj.m_pChildList ) + { + m_pChildList.reset( new XMLChildNodeList ); + for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ ) + AddChild( (*rObj.m_pChildList)[ i ] ); + } + else + m_pChildList.reset(); + + } + return *this; +} +void XMLParentNode::AddChild( XMLChildNode *pChild ) +{ + if ( !m_pChildList ) + m_pChildList.reset( new XMLChildNodeList ); + m_pChildList->push_back( pChild ); +} + +void XMLParentNode::RemoveAndDeleteAllChildren() +{ + if ( m_pChildList ) + { + for ( size_t i = 0; i < m_pChildList->size(); i++ ) + delete (*m_pChildList)[ i ]; + m_pChildList->clear(); + } +} + + + + +void XMLFile::Write( OString const &aFilename ) +{ + std::ofstream s( + aFilename.getStr(), std::ios_base::out | std::ios_base::trunc); + if (!s.is_open()) + { + std::cerr + << "Error: helpex cannot create file " << aFilename + << '\n'; + std::exit(EXIT_FAILURE); + } + Write(s); + s.close(); +} + +void XMLFile::Write( std::ofstream &rStream , XMLNode *pCur ) +{ + if ( !pCur ) + Write( rStream, this ); + else { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + Write( rStream, (*GetChildList())[ i ] ); + } + break; + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + rStream << "<"; + rStream << pElement->GetName(); + if ( pElement->GetAttributeList()) + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { + rStream << " "; + OString sData( (*pElement->GetAttributeList())[ j ]->GetName() ); + rStream << XMLUtil::QuotHTML( sData ); + rStream << "=\""; + sData = (*pElement->GetAttributeList())[ j ]->GetValue(); + rStream << XMLUtil::QuotHTML( sData ); + rStream << "\""; + } + if ( !pElement->GetChildList()) + rStream << "/>"; + else + { + rStream << ">"; + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + Write( rStream, (*pElement->GetChildList())[ k ] ); + rStream << "</"; + rStream << pElement->GetName(); + rStream << ">"; + } + } + break; + case XMLNodeType::DATA: + { + OString sData( static_cast<const XMLData*>(pCur)->GetData()); + rStream << XMLUtil::QuotHTML( sData ); + } + break; + case XMLNodeType::COMMENT: + { + const XMLComment *pComment = static_cast<const XMLComment*>(pCur); + rStream << "<!--"; + rStream << pComment->GetComment(); + rStream << "-->"; + } + break; + case XMLNodeType::DEFAULT: + { + const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur); + rStream << pDefault->GetDefault(); + } + break; + } + } +} + +void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel ) +{ + if ( !pCur ) + Print( this ); + else + { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + Print( (*GetChildList())[ i ] ); + } + break; + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + + fprintf( stdout, "<%s", pElement->GetName().getStr()); + if ( pElement->GetAttributeList()) + { + for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j) + { + const OString aAttrName((*pElement->GetAttributeList())[j]->GetName()); + if (aAttrName != XML_LANG) + { + fprintf( stdout, " %s=\"%s\"", + aAttrName.getStr(), + (*pElement->GetAttributeList())[ j ]->GetValue().getStr()); + } + } + } + if ( !pElement->GetChildList()) + fprintf( stdout, "/>" ); + else + { + fprintf( stdout, ">" ); + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + Print( (*pElement->GetChildList())[ k ], nLevel + 1 ); + fprintf( stdout, "</%s>", pElement->GetName().getStr()); + } + } + break; + case XMLNodeType::DATA: + { + const XMLData *pData = static_cast<const XMLData*>(pCur); + fprintf( stdout, "%s", pData->GetData().getStr()); + } + break; + case XMLNodeType::COMMENT: + { + const XMLComment *pComment = static_cast<const XMLComment*>(pCur); + fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr()); + } + break; + case XMLNodeType::DEFAULT: + { + const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur); + fprintf( stdout, "%s", pDefault->GetDefault().getStr()); + } + break; + } + } +} +XMLFile::~XMLFile() +{ + if( m_pXMLStrings ) + { + for (auto const& pos : *m_pXMLStrings) + { + delete pos.second; // Check and delete content also ? + } + } +} + +XMLFile::XMLFile( OString _sFileName ) // the file name, empty if created from memory stream + : XMLParentNode( nullptr ) + , m_sFileName(std::move( _sFileName )) +{ + m_aNodes_localize.emplace( "bookmark"_ostr , true ); + m_aNodes_localize.emplace( "variable"_ostr , true ); + m_aNodes_localize.emplace( "paragraph"_ostr , true ); + m_aNodes_localize.emplace( "h1"_ostr , true ); + m_aNodes_localize.emplace( "h2"_ostr , true ); + m_aNodes_localize.emplace( "h3"_ostr , true ); + m_aNodes_localize.emplace( "h4"_ostr , true ); + m_aNodes_localize.emplace( "h5"_ostr , true ); + m_aNodes_localize.emplace( "h6"_ostr , true ); + m_aNodes_localize.emplace( "note"_ostr , true ); + m_aNodes_localize.emplace( "tip"_ostr , true ); + m_aNodes_localize.emplace( "warning"_ostr , true ); + m_aNodes_localize.emplace( "alt"_ostr , true ); + m_aNodes_localize.emplace( "caption"_ostr , true ); + m_aNodes_localize.emplace( "title"_ostr , true ); + m_aNodes_localize.emplace( "link"_ostr , true ); +} + +void XMLFile::Extract() +{ + m_pXMLStrings.reset( new XMLHashMap ); + SearchL10NElements( this ); +} + +void XMLFile::InsertL10NElement( XMLElement* pElement ) +{ + OString sId, sLanguage("en-US"_ostr); + LangHashMap* pElem; + + if( pElement->GetAttributeList() != nullptr ) + { + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { + const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName()); + // Get the "id" Attribute + if (sTempStr == "id") + { + sId = (*pElement->GetAttributeList())[ j ]->GetValue(); + } + // Get the "xml-lang" Attribute + if (sTempStr == XML_LANG) + { + sLanguage = (*pElement->GetAttributeList())[j]->GetValue(); + } + + } + } + else + { + fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found"); + fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++"); + Print( pElement ); + fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++"); + } + + XMLHashMap::iterator pos = m_pXMLStrings->find( sId ); + if( pos == m_pXMLStrings->end() ) // No instance, create new one + { + pElem = new LangHashMap; + (*pElem)[ sLanguage ]=pElement; + m_pXMLStrings->emplace( sId , pElem ); + m_vOrder.push_back( sId ); + } + else // Already there + { + pElem=pos->second; + if ( pElem->count(sLanguage) ) + { + fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() ); + exit( -1 ); + } + (*pElem)[ sLanguage ]=pElement; + } +} + +XMLFile::XMLFile( const XMLFile& rObj ) + : XMLParentNode( rObj ) + , m_sFileName( rObj.m_sFileName ) +{ + if( this != &rObj ) + { + m_aNodes_localize = rObj.m_aNodes_localize; + m_vOrder = rObj.m_vOrder; + } +} + +XMLFile& XMLFile::operator=(const XMLFile& rObj) +{ + if( this == &rObj ) + return *this; + + XMLParentNode::operator=(rObj); + + m_aNodes_localize = rObj.m_aNodes_localize; + m_vOrder = rObj.m_vOrder; + + m_pXMLStrings.reset(); + + if( rObj.m_pXMLStrings ) + { + m_pXMLStrings.reset( new XMLHashMap ); + for (auto const& pos : *rObj.m_pXMLStrings) + { + LangHashMap* pElem=pos.second; + LangHashMap* pNewelem = new LangHashMap; + for (auto const& pos2 : *pElem) + { + (*pNewelem)[ pos2.first ] = new XMLElement( *pos2.second ); + } + (*m_pXMLStrings)[ pos.first ] = pNewelem; + } + } + return *this; +} + +void XMLFile::SearchL10NElements( XMLChildNode *pCur ) +{ + if ( !pCur ) + SearchL10NElements( this ); + else + { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + { + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + { + XMLChildNode* pElement = (*GetChildList())[ i ]; + if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) + SearchL10NElements( pElement ); + } + } + } + break; + case XMLNodeType::ELEMENT: + { + bool bInsert = true; + XMLElement *pElement = static_cast<XMLElement*>(pCur); + const OString sName(pElement->GetName().toAsciiLowerCase()); + if ( pElement->GetAttributeList()) + { + for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j ) + { + if ((*pElement->GetAttributeList())[j]->GetName() == "localize") + { + bInsert=false; + break; + } + } + } + + if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) ) + InsertL10NElement(pElement); + else if ( bInsert && pElement->GetChildList() ) + { + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + SearchL10NElements( (*pElement->GetChildList())[ k ] ); + } + } + break; + default: + break; + } + } +} + +bool XMLFile::CheckExportStatus( XMLChildNode *pCur ) +{ + static bool bStatusExport = true; + + if ( !pCur ) + CheckExportStatus( this ); + else { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + { + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + { + XMLChildNode* pElement = (*GetChildList())[ i ]; + if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) CheckExportStatus( pElement );//, i); + } + } + } + break; + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC")) + { + if ( pElement->GetAttributeList()) + { + for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt; ++j) + { + const OString tmpStr((*pElement->GetAttributeList())[j]->GetName()); + if (tmpStr.equalsIgnoreAsciiCase("STATUS")) + { + const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue()); + if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") && + !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED")) + { + bStatusExport = false; + } + } + + } + } + } + else if ( pElement->GetChildList() ) + { + for (size_t k = 0; k < pElement->GetChildList()->size(); ++k) + CheckExportStatus( (*pElement->GetChildList())[k] ); + } + } + break; + default: + break; + } + } + return bStatusExport; +} + +XMLElement::XMLElement( + OString _sName, // the element name + XMLParentNode *pParent // parent node of this element +) + : XMLParentNode( pParent ) + , m_sElementName(std::move( _sName )) +{ +} + +XMLElement::XMLElement(const XMLElement& rObj) + : XMLParentNode( rObj ) + , m_sElementName( rObj.m_sElementName ) +{ + if ( rObj.m_pAttributes ) + { + m_pAttributes.reset( new XMLAttributeList ); + for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ ) + AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() ); + } +} + +XMLElement& XMLElement::operator=(const XMLElement& rObj) +{ + if( this !=& rObj ) + { + XMLParentNode::operator=(rObj); + m_sElementName = rObj.m_sElementName; + + if ( m_pAttributes ) + { + for ( size_t i = 0; i < m_pAttributes->size(); i++ ) + delete (*m_pAttributes)[ i ]; + m_pAttributes.reset(); + } + if ( rObj.m_pAttributes ) + { + m_pAttributes.reset( new XMLAttributeList ); + for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ ) + AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() ); + } + } + return *this; +} + +void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue ) +{ + if ( !m_pAttributes ) + m_pAttributes.reset( new XMLAttributeList ); + m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) ); +} + +void XMLElement::ChangeLanguageTag( const OString &rValue ) +{ + if ( m_pAttributes ) + { + bool bWasSet = false; + for (size_t i = 0; i < m_pAttributes->size(); ++i) + { + if ((*m_pAttributes)[ i ]->GetName() == XML_LANG) + { + (*m_pAttributes)[ i ]->setValue(rValue); + bWasSet = true; + } + } + + if (!bWasSet) + AddAttribute(XML_LANG, rValue); + } + XMLChildNodeList* pCList = GetChildList(); + + if( !pCList ) + return; + + for ( size_t i = 0; i < pCList->size(); i++ ) + { + XMLChildNode* pNode = (*pCList)[ i ]; + if( pNode && pNode->GetNodeType() == XMLNodeType::ELEMENT ) + { + XMLElement* pElem = static_cast< XMLElement* >(pNode); + pElem->ChangeLanguageTag( rValue ); + pElem = nullptr; + pNode = nullptr; + } + } + pCList = nullptr; +} + +XMLElement::~XMLElement() +{ + if ( m_pAttributes ) + { + for ( size_t i = 0; i < m_pAttributes->size(); i++ ) + delete (*m_pAttributes)[ i ]; + } +} + +OString XMLElement::ToOString() +{ + OStringBuffer sBuffer; + Print(this,sBuffer,true); + return sBuffer.makeStringAndClear(); +} + +void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const +{ + if( pCur ) + { + if( bRootelement ) + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + if ( pElement->GetAttributeList()) + { + if ( pElement->GetChildList()) + { + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + { + XMLChildNode* pTmp = (*pElement->GetChildList())[ k ]; + Print( pTmp, rBuffer , false); + } + } + } + } + else + { + switch( pCur->GetNodeType()) + { + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + + if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ) + { + rBuffer.append( "<" ); + rBuffer.append( pElement->GetName() ); + if ( pElement->GetAttributeList()) + { + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { + const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() ); + if (aAttrName != XML_LANG) + { + rBuffer.append( + " " + aAttrName + "=\"" + + (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" ); + } + } + } + if ( !pElement->GetChildList()) + rBuffer.append( "/>" ); + else + { + rBuffer.append( ">" ); + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + { + XMLChildNode* pTmp = (*pElement->GetChildList())[ k ]; + Print( pTmp, rBuffer , false); + } + rBuffer.append( "</" + pElement->GetName() + ">" ); + } + } + } + break; + case XMLNodeType::DATA: + { + const XMLData *pData = static_cast<const XMLData*>(pCur); + rBuffer.append( pData->GetData() ); + } + break; + case XMLNodeType::COMMENT: + { + const XMLComment *pComment = static_cast<const XMLComment*>(pCur); + rBuffer.append( "<!--" + pComment->GetComment() + "-->" ); + } + break; + case XMLNodeType::DEFAULT: + { + const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur); + rBuffer.append( pDefault->GetDefault() ); + } + break; + default: + break; + } + } + } + else + { + fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n"); + return; + } +} + + + + +namespace +{ + +OUString lcl_pathnameToAbsoluteUrl(std::string_view rPathname) +{ + OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 ); + OUString sUrl; + if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl) + != osl::FileBase::E_None) + { + std::cerr << "Error: Cannot convert input pathname to URL\n"; + std::exit(EXIT_FAILURE); + } + OUString sCwd; + if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None) + { + std::cerr << "Error: Cannot determine cwd\n"; + std::exit(EXIT_FAILURE); + } + if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl) + != osl::FileBase::E_None) + { + std::cerr << "Error: Cannot convert input URL to absolute URL\n"; + std::exit(EXIT_FAILURE); + } + return sUrl; +} +} + + +SimpleXMLParser::SimpleXMLParser() + : m_pCurNode(nullptr) + , m_pCurData(nullptr) +{ + m_aParser = XML_ParserCreate( nullptr ); + XML_SetUserData( m_aParser, this ); + XML_SetElementHandler( m_aParser, StartElementHandler, EndElementHandler ); + XML_SetCharacterDataHandler( m_aParser, CharacterDataHandler ); + XML_SetCommentHandler( m_aParser, CommentHandler ); + XML_SetDefaultHandler( m_aParser, DefaultHandler ); +} + +SimpleXMLParser::~SimpleXMLParser() +{ + XML_ParserFree( m_aParser ); +} + +void SimpleXMLParser::StartElementHandler( + void *userData, const XML_Char *name, const XML_Char **atts ) +{ + static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts ); +} + +void SimpleXMLParser::EndElementHandler( + void *userData, const XML_Char * /*name*/ ) +{ + static_cast<SimpleXMLParser *>(userData)->EndElement(); +} + +void SimpleXMLParser::CharacterDataHandler( + void *userData, const XML_Char *s, int len ) +{ + static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len ); +} + +void SimpleXMLParser::CommentHandler( + void *userData, const XML_Char *data ) +{ + static_cast<SimpleXMLParser *>(userData)->Comment( data ); +} + +void SimpleXMLParser::DefaultHandler( + void *userData, const XML_Char *s, int len ) +{ + static_cast<SimpleXMLParser *>(userData)->Default( s, len ); +} + +void SimpleXMLParser::StartElement( + const XML_Char *name, const XML_Char **atts ) +{ + XMLElement *pElement = new XMLElement( OString(name), m_pCurNode ); + m_pCurNode = pElement; + m_pCurData = nullptr; + + int i = 0; + while( atts[i] ) + { + pElement->AddAttribute( atts[ i ], atts[ i + 1 ] ); + i += 2; + } +} + +void SimpleXMLParser::EndElement() +{ + m_pCurNode = m_pCurNode->GetParent(); + m_pCurData = nullptr; +} + +void SimpleXMLParser::CharacterData( const XML_Char *s, int len ) +{ + if ( !m_pCurData ) + { + OString x( s, len ); + m_pCurData = new XMLData( helper::UnQuotHTML(x) , m_pCurNode ); + } + else + { + OString x( s, len ); + m_pCurData->AddData( helper::UnQuotHTML(x) ); + + } +} + +void SimpleXMLParser::Comment( const XML_Char *data ) +{ + m_pCurData = nullptr; + new XMLComment( OString( data ), m_pCurNode ); +} + +void SimpleXMLParser::Default( const XML_Char *s, int len ) +{ + m_pCurData = nullptr; + new XMLDefault(OString( s, len ), m_pCurNode ); +} + +bool SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFile ) +{ + m_aErrorInformation.m_eCode = XML_ERROR_NONE; + m_aErrorInformation.m_nLine = 0; + m_aErrorInformation.m_nColumn = 0; + m_aErrorInformation.m_sMessage = "ERROR: Unable to open file "_ostr; + m_aErrorInformation.m_sMessage += rFileName; + + OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName)); + + oslFileHandle h; + if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read) + != osl_File_E_None) + { + return false; + } + + sal_uInt64 s; + oslFileError e = osl_getFileSize(h, &s); + void * p = nullptr; + if (e == osl_File_E_None) + { + e = osl_mapFile(h, &p, s, 0, 0); + } + if (e != osl_File_E_None) + { + osl_closeFile(h); + return false; + } + + pXMLFile->SetName( rFileName ); + + m_pCurNode = pXMLFile; + m_pCurData = nullptr; + + m_aErrorInformation.m_eCode = XML_ERROR_NONE; + m_aErrorInformation.m_nLine = 0; + m_aErrorInformation.m_nColumn = 0; + if ( !pXMLFile->GetName().isEmpty()) + { + m_aErrorInformation.m_sMessage = "File " + pXMLFile->GetName() + " parsed successfully"; + } + else + m_aErrorInformation.m_sMessage = "XML-File parsed successfully"_ostr; + + bool result = XML_Parse(m_aParser, static_cast< char * >(p), s, true); + if (!result) + { + m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser ); + m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser ); + m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser ); + + m_aErrorInformation.m_sMessage = "ERROR: "_ostr; + if ( !pXMLFile->GetName().isEmpty()) + m_aErrorInformation.m_sMessage += pXMLFile->GetName(); + else + m_aErrorInformation.m_sMessage += "XML-File ("; + + m_aErrorInformation.m_sMessage += + OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," + + OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): "; + + switch (m_aErrorInformation.m_eCode) + { + case XML_ERROR_NO_MEMORY: + m_aErrorInformation.m_sMessage += "No memory"; + break; + case XML_ERROR_SYNTAX: + m_aErrorInformation.m_sMessage += "Syntax"; + break; + case XML_ERROR_NO_ELEMENTS: + m_aErrorInformation.m_sMessage += "No elements"; + break; + case XML_ERROR_INVALID_TOKEN: + m_aErrorInformation.m_sMessage += "Invalid token"; + break; + case XML_ERROR_UNCLOSED_TOKEN: + m_aErrorInformation.m_sMessage += "Unclosed token"; + break; + case XML_ERROR_PARTIAL_CHAR: + m_aErrorInformation.m_sMessage += "Partial char"; + break; + case XML_ERROR_TAG_MISMATCH: + m_aErrorInformation.m_sMessage += "Tag mismatch"; + break; + case XML_ERROR_DUPLICATE_ATTRIBUTE: + m_aErrorInformation.m_sMessage += "Duplicated attribute"; + break; + case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: + m_aErrorInformation.m_sMessage += "Junk after doc element"; + break; + case XML_ERROR_PARAM_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Param entity ref"; + break; + case XML_ERROR_UNDEFINED_ENTITY: + m_aErrorInformation.m_sMessage += "Undefined entity"; + break; + case XML_ERROR_RECURSIVE_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Recursive entity ref"; + break; + case XML_ERROR_ASYNC_ENTITY: + m_aErrorInformation.m_sMessage += "Async_entity"; + break; + case XML_ERROR_BAD_CHAR_REF: + m_aErrorInformation.m_sMessage += "Bad char ref"; + break; + case XML_ERROR_BINARY_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Binary entity"; + break; + case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Attribute external entity ref"; + break; + case XML_ERROR_MISPLACED_XML_PI: + m_aErrorInformation.m_sMessage += "Misplaced xml pi"; + break; + case XML_ERROR_UNKNOWN_ENCODING: + m_aErrorInformation.m_sMessage += "Unknown encoding"; + break; + case XML_ERROR_INCORRECT_ENCODING: + m_aErrorInformation.m_sMessage += "Incorrect encoding"; + break; + case XML_ERROR_UNCLOSED_CDATA_SECTION: + m_aErrorInformation.m_sMessage += "Unclosed cdata section"; + break; + case XML_ERROR_EXTERNAL_ENTITY_HANDLING: + m_aErrorInformation.m_sMessage += "External entity handling"; + break; + case XML_ERROR_NOT_STANDALONE: + m_aErrorInformation.m_sMessage += "Not standalone"; + break; + case XML_ERROR_NONE: + break; + default: + break; + } + } + + osl_unmapMappedFile(h, p, s); + osl_closeFile(h); + + return result; +} + +namespace +{ + +icu::UnicodeString lcl_QuotRange( + const icu::UnicodeString& rString, const sal_Int32 nStart, + const sal_Int32 nEnd, bool bInsideTag = false ) +{ + icu::UnicodeString sReturn; + assert( nStart < nEnd ); + assert( nStart >= 0 ); + assert( nEnd <= rString.length() ); + for (sal_Int32 i = nStart; i < nEnd; ++i) + { + switch (rString[i]) + { + case '<': + sReturn.append("<"); + break; + case '>': + sReturn.append(">"); + break; + case '"': + if( !bInsideTag ) + sReturn.append("""); + else + sReturn.append(rString[i]); + break; + case '&': + if (rString.startsWith("&", i, 5)) + sReturn.append('&'); + else + sReturn.append("&"); + break; + default: + sReturn.append(rString[i]); + break; + } + } + return sReturn; +} + +bool lcl_isTag( const icu::UnicodeString& rString ) +{ + static const int nSize = 20; + static const icu::UnicodeString vTags[nSize] = { + "ahelp", "link", "item", "emph", "defaultinline", + "switchinline", "caseinline", "variable", + "bookmark_value", "image", "object", + "embedvar", "alt", "sup", "sub", + "menuitem", "keycode", "input", "literal", "widget" + }; + + for( int nIndex = 0; nIndex < nSize; ++nIndex ) + { + if( rString.startsWith("<" + vTags[nIndex]) || + rString == "</" + vTags[nIndex] + ">" ) + return true; + } + + return rString == "<br/>" || rString =="<help-id-missing/>"; +} + +} /// anonymous namespace + +OString XMLUtil::QuotHTML( const OString &rString ) +{ + if( o3tl::trim(rString).empty() ) + return rString; + UErrorCode nIcuErr = U_ZERO_ERROR; + static const sal_uInt32 nSearchFlags = + UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE; + static const icu::UnicodeString sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" ); + + const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8); + icu::UnicodeString sSource( + reinterpret_cast<const UChar*>( + sOUSource.getStr()), sOUSource.getLength() ); + + icu::RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr ); + aRegexMatcher.reset( sSource ); + + icu::UnicodeString sReturn; + int32_t nEndPos = 0; + int32_t nStartPos = 0; + while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) ) + { + nStartPos = aRegexMatcher.start(nIcuErr); + if ( nEndPos < nStartPos ) + sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos)); + nEndPos = aRegexMatcher.end(nIcuErr); + icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr); + if( lcl_isTag(sMatch) ) + { + sReturn.append("<"); + sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true)); + sReturn.append(">"); + } + else + sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos)); + nStartPos = nEndPos; + } + if( nEndPos < sSource.length() ) + sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length())); + sReturn.append('\0'); + return + OUStringToOString( + reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()), + RTL_TEXTENCODING_UTF8); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/xrm_yy_wrapper.c b/l10ntools/source/xrm_yy_wrapper.c new file mode 100644 index 0000000000..36f902e0fd --- /dev/null +++ b/l10ntools/source/xrm_yy_wrapper.c @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +// Helper to suppress warnings in lex generated c code, see #i57362# +#include "xrm_yy.c" + +void (*avoid_unused_yyunput_in_xrm_yy_c)() = yyunput; +int (*avoid_unused_yy_flex_strlen_in_xrm_yy_c)() = yy_flex_strlen; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/xrmlex.l b/l10ntools/source/xrmlex.l new file mode 100644 index 0000000000..0644a5bc28 --- /dev/null +++ b/l10ntools/source/xrmlex.l @@ -0,0 +1,218 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +%{ + +/* + * lexer for parsing xml-property source files (*.xml) + */ + +#include <sal/config.h> + +/* enlarge token buffer to tokenize whole strings */ +#undef YYLMAX +#define YYLMAX 64000 + +/* to enable debug output define LEXDEBUG */ +#define LEXDEBUG 1 +#ifdef LEXDEBUG +#define OUTPUT fprintf +#else +#define OUTPUT(Par1,Par2); +#endif + +/* table of possible token ids */ +#include <tokens.h> +#include <xrmlex.hxx> +#include <stdlib.h> +#include <stdio.h> + +#include <sal/main.h> + +#define YY_NO_UNISTD_H + +static int bText=0; +%} + +%option yylineno +%option nounput +%option never-interactive + +%p 24000 +%e 1200 +%n 500 + +%% + +"<p "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</p>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} + +"<h1 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h1>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h2 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h2>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h3 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h3>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h4 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h4>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h5 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h5>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} + +"<display-name>" { + WorkOnTokenSet( DESC_DISPLAY_NAME_START , yytext ); +} + +"</display-name>" { + WorkOnTokenSet( DESC_DISPLAY_NAME_END, yytext ); +} + +"<name "[^\>]*lang[^\>]*\> { + WorkOnTokenSet( DESC_TEXT_START , yytext ); +} + +"</name>" { + WorkOnTokenSet( DESC_TEXT_END, yytext ); +} + +"<extension-description>" { + WorkOnTokenSet( DESC_EXTENSION_DESCRIPTION_START , yytext ); +} + +"</extension-description>" { + WorkOnTokenSet( DESC_EXTENSION_DESCRIPTION_END , yytext ); +} + +"<src "[^\>]*lang[^\>]*\> { + WorkOnTokenSet( DESC_EXTENSION_DESCRIPTION_SRC , yytext ); +} + + + +"<!--" { + int c1 = 0, c2 = 0; + int c3 = yyinput(); + char pChar[2]; + pChar[1] = 0x00; + pChar[0] = c3; + + WorkOnTokenSet( COMMENT, yytext ); + WorkOnTokenSet( COMMENT, pChar ); + + for(;;) { + if ( c3 == EOF ) + break; + if ( c1 == '-' && c2 == '-' && c3 == '>' ) + break; + c1 = c2; + c2 = c3; + c3 = yyinput(); + pChar[0] = c3; + WorkOnTokenSet( COMMENT, pChar ); + } +} + +.|\n { + if ( bText == 1 ) + WorkOnTokenSet( XML_TEXTCHAR, yytext ); + else + WorkOnTokenSet( UNKNOWNCHAR, yytext ); +} + + +%% + +/*****************************************************************************/ +int yywrap(void) +/*****************************************************************************/ +{ + return 1; +} + +/*****************************************************************************/ +void yyerror ( const char *s ) +/*****************************************************************************/ +{ + /* write error to stderr */ + fprintf( stderr, + "Error: \"%s\" in line %d: \"%s\"\n", s, yylineno, yytext ); + SetError(); +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { + /* error level */ + int nRetValue = 0; + FILE *pFile; + + if ( !GetOutputFile( argc, argv ) ) + { + return 1; + } + pFile = GetXrmFile(); + InitXrmExport( getFilename() ); + + if ( !pFile ) + return 1; + + yyin = pFile; + + /* create global instance of class XmlExport */ + //InitXrmExport( pOutput ); + + /* start parser */ + yylex(); + + /* get error info. and end export */ + nRetValue = GetError(); + EndXrmExport(); + + /* return error level */ + return nRetValue; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/l10ntools/source/xrmmerge.cxx b/l10ntools/source/xrmmerge.cxx new file mode 100644 index 0000000000..f69b039a44 --- /dev/null +++ b/l10ntools/source/xrmmerge.cxx @@ -0,0 +1,490 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <stdio.h> + +#include <common.hxx> +#include <export.hxx> +#include <po.hxx> +#include <utility> +#include <xrmlex.hxx> +#include <xrmmerge.hxx> +#include <tokens.h> +#include <helper.hxx> +#include <iostream> +#include <vector> +#include <memory> + +// set of global variables +static bool bMergeMode; +static bool bDisplayName; +static bool bExtensionDescription; +static OString sLanguage; +static OString sInputFileName; +static OString sOutputFile; +static OString sMergeSrc; +static OString sLangAttribute; +static OString sResourceType; +static XRMResParser *pParser = nullptr; + +extern "C" { +// the whole interface to lexer is in this extern "C" section + +extern bool GetOutputFile( int argc, char* argv[]) +{ + bDisplayName = false; + bExtensionDescription = false; + + common::HandledArgs aArgs; + if ( common::handleArguments(argc, argv, aArgs) ) + { + bMergeMode = aArgs.m_bMergeMode; + sLanguage = aArgs.m_sLanguage; + sInputFileName = aArgs.m_sInputFile; + sOutputFile = aArgs.m_sOutputFile; + sMergeSrc = aArgs.m_sMergeSrc; + return true; + } + else + { + // command line is not valid + common::writeUsage("xrmex"_ostr,"*.xrm/*.xml"_ostr); + return false; + } +} + +int InitXrmExport( const char* pFilename) +{ + // instantiate Export + OString sFilename( pFilename ); + + if ( bMergeMode ) + pParser = new XRMResMerge( sMergeSrc, sOutputFile, sFilename ); + else if (!sOutputFile.isEmpty()) + pParser = new XRMResExport( sOutputFile, sInputFileName ); + + return 1; +} + +int EndXrmExport() +{ + delete pParser; + return 1; +} +extern const char* getFilename() +{ + return sInputFileName.getStr(); +} + +extern FILE *GetXrmFile() +{ + // look for valid filename + if (!sInputFileName.isEmpty()) { + //TODO: explicit BOM handling? + FILE * pFile = fopen(sInputFileName.getStr(), "r"); + if ( !pFile ){ + fprintf( stderr, "Error: Could not open file %s\n", + sInputFileName.getStr()); + } + else { + return pFile; + } + } + // this means the file could not be opened + return nullptr; +} + +int WorkOnTokenSet( int nTyp, char *pTokenText ) +{ + //printf("Typ = %d , text = '%s'\n",nTyp , pTokenText ); + pParser->Execute( nTyp, pTokenText ); + + return 1; +} + +int SetError() +{ + pParser->SetError(); + return 1; +} +} + +extern "C" { + +int GetError() +{ + return pParser->GetError(); +} +} + + + + +XRMResParser::XRMResParser() + : bError( false ), + bText( false ) +{ +} + +XRMResParser::~XRMResParser() +{ +} + +void XRMResParser::Execute( int nToken, char * pToken ) +{ + OString rToken( pToken ); + + switch ( nToken ) { + case XRM_TEXT_START:{ + OString sNewGID = GetAttribute( rToken, "id" ); + if ( sNewGID != sGID ) { + sGID = sNewGID; + } + bText = true; + sCurrentText = OString(); + sCurrentOpenTag = rToken; + Output( rToken ); + } + break; + + case XRM_TEXT_END: { + sCurrentCloseTag = rToken; + sResourceType = "readmeitem"_ostr; + sLangAttribute = "xml:lang"_ostr; + WorkOnText( sCurrentOpenTag, sCurrentText ); + Output( sCurrentText ); + EndOfText( sCurrentOpenTag, sCurrentCloseTag ); + bText = false; + rToken = OString(); + sCurrentText = OString(); + } + break; + + case DESC_DISPLAY_NAME_START:{ + bDisplayName = true; + } + break; + + case DESC_DISPLAY_NAME_END:{ + bDisplayName = false; + } + break; + + case DESC_TEXT_START:{ + if (bDisplayName) { + sGID = "dispname"_ostr; + bText = true; + sCurrentText = OString(); + sCurrentOpenTag = rToken; + Output( rToken ); + } + } + break; + + case DESC_TEXT_END: { + if (bDisplayName) { + sCurrentCloseTag = rToken; + sResourceType = "description"_ostr; + sLangAttribute = "lang"_ostr; + WorkOnText( sCurrentOpenTag, sCurrentText ); + Output( sCurrentText ); + EndOfText( sCurrentOpenTag, sCurrentCloseTag ); + bText = false; + rToken = OString(); + sCurrentText = OString(); + } + } + break; + + case DESC_EXTENSION_DESCRIPTION_START: { + bExtensionDescription = true; + } + break; + + case DESC_EXTENSION_DESCRIPTION_END: { + bExtensionDescription = false; + } + break; + + case DESC_EXTENSION_DESCRIPTION_SRC: { + if (bExtensionDescription) { + sGID = "extdesc"_ostr; + sResourceType = "description"_ostr; + sLangAttribute = "lang"_ostr; + sCurrentOpenTag = rToken; + sCurrentText = OString(); + Output( rToken ); + WorkOnDesc( sCurrentOpenTag, sCurrentText ); + sCurrentCloseTag = rToken; + Output( sCurrentText ); + rToken = OString(); + sCurrentText = OString(); + } + } + break; + + default: + if ( bText ) { + sCurrentText += rToken; + } + break; + } + + if ( !bText ) + { + Output( rToken ); + } +} + +OString XRMResParser::GetAttribute( const OString &rToken, std::string_view rAttribute ) +{ + const OString sSearch{ OString::Concat(" ") + rAttribute + "=" }; + OString sTmp{ rToken.replace('\t', ' ') }; + sal_Int32 nPos = sTmp.indexOf( sSearch ); + + if ( nPos<0 ) + return OString(); + + return sTmp.getToken(1, '"', nPos); +} + + +void XRMResParser::Error( const OString &rError ) +{ + yyerror(rError.getStr()); +} + + + + +XRMResExport::XRMResExport( + const OString &rOutputFile, OString _sFilePath ) + : sPath(std::move( _sFilePath )) +{ + pOutputStream.open( rOutputFile, PoOfstream::APP ); + if (!pOutputStream.isOpen()) + { + Error( "Unable to open output file: " + rOutputFile ); + } +} + +XRMResExport::~XRMResExport() +{ + pOutputStream.close(); +} + +void XRMResExport::Output( const OString& ) {} + +void XRMResExport::WorkOnDesc( + const OString &rOpenTag, + OString &rText ) +{ + const OString sDescFileName{ sInputFileName.replaceAll("description.xml"_ostr, OString()) + + GetAttribute( rOpenTag, "xlink:href" ) }; + std::ifstream file (sDescFileName.getStr(), std::ios::in|std::ios::binary|std::ios::ate); + if (file.is_open()) { + int size = static_cast<int>(file.tellg()); + std::unique_ptr<char[]> memblock(new char [size+1]); + file.seekg (0, std::ios::beg); + file.read (memblock.get(), size); + file.close(); + memblock[size] = '\0'; + rText = OString(memblock.get()); + } + WorkOnText( rOpenTag, rText ); + EndOfText( rOpenTag, rOpenTag ); +} + +void XRMResExport::WorkOnText( + const OString &rOpenTag, + OString &rText ) +{ + OString sLang( GetAttribute( rOpenTag, sLangAttribute )); + + if ( !pResData ) + { + pResData.reset( new ResData( GetGID() ) ); + } + pResData->sText[sLang] = rText; +} + +void XRMResExport::EndOfText( + const OString &, + const OString & ) +{ + if ( pResData ) + { + OString sAct = pResData->sText["en-US"_ostr]; + + if( !sAct.isEmpty() ) + common::writePoEntry( + "Xrmex"_ostr, pOutputStream, sPath, sResourceType, + pResData->sGId, OString(), OString(), sAct ); + } + pResData.reset(); +} + + + + +XRMResMerge::XRMResMerge( + const OString &rMergeSource, const OString &rOutputFile, + OString _sFilename ) + : sFilename(std::move( _sFilename )) +{ + if (!rMergeSource.isEmpty() && sLanguage.equalsIgnoreAsciiCase("ALL")) + { + pMergeDataFile.reset(new MergeDataFile( + rMergeSource, sInputFileName, false)); + aLanguages = pMergeDataFile->GetLanguages(); + } + else + aLanguages.push_back( sLanguage ); + pOutputStream.open( + rOutputFile.getStr(), std::ios_base::out | std::ios_base::trunc); + if (!pOutputStream.is_open()) { + Error( "Unable to open output file: " + rOutputFile ); + } +} + +XRMResMerge::~XRMResMerge() +{ + pOutputStream.close(); +} + +void XRMResMerge::WorkOnDesc( + const OString &rOpenTag, + OString &rText ) +{ + WorkOnText( rOpenTag, rText); + if ( pMergeDataFile && pResData ) { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrys( pResData.get() ); + if ( pEntrys ) { + OString sCur; + OString sDescFilename = GetAttribute ( rOpenTag, "xlink:href" ); + for( size_t n = 0; n < aLanguages.size(); n++ ){ + sCur = aLanguages[ n ]; + OString sText; + if ( !sCur.equalsIgnoreAsciiCase("en-US") && + ( pEntrys->GetText( sText, sCur, true )) && + !sText.isEmpty()) + { + OString sAdditionalLine{ "\n " + rOpenTag }; + OString sSearch{ sLangAttribute + "=\"" }; + OString sReplace( sSearch ); + + sSearch += GetAttribute( rOpenTag, sLangAttribute ); + sReplace += sCur; + sAdditionalLine = sAdditionalLine.replaceFirst( + sSearch, sReplace); + + sSearch = "xlink:href=\""_ostr; + sReplace = sSearch; + + const OString sLocDescFilename = sDescFilename.replaceFirst( "en-US"_ostr, sCur); + + sSearch += sDescFilename; + sReplace += sLocDescFilename; + sAdditionalLine = sAdditionalLine.replaceFirst( + sSearch, sReplace); + + Output( sAdditionalLine ); + + sal_Int32 i = sOutputFile.lastIndexOf('/'); + if (i == -1) { + std::cerr + << "Error: output file " << sOutputFile + << " does not contain any /\n"; + throw false; //TODO + } + OString sOutputDescFile( + sOutputFile.subView(0, i + 1) + sLocDescFilename); + std::ofstream file(sOutputDescFile.getStr()); + if (file.is_open()) { + file << sText; + file.close(); + } else { + std::cerr + << "Error: cannot write " + << sOutputDescFile << '\n'; + throw false; //TODO + } + } + } + } + } + pResData.reset(); +} + +void XRMResMerge::WorkOnText( + const OString &, + OString & ) +{ + if ( pMergeDataFile && !pResData ) { + pResData.reset( new ResData( GetGID(), sFilename ) ); + pResData->sResTyp = sResourceType; + } +} + +void XRMResMerge::Output( const OString& rOutput ) +{ + if (!rOutput.isEmpty()) + pOutputStream << rOutput; +} + +void XRMResMerge::EndOfText( + const OString &rOpenTag, + const OString &rCloseTag ) +{ + + Output( rCloseTag ); + if ( pMergeDataFile && pResData ) { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrys( pResData.get() ); + if ( pEntrys ) { + OString sCur; + for( size_t n = 0; n < aLanguages.size(); n++ ){ + sCur = aLanguages[ n ]; + OString sContent; + if (!sCur.equalsIgnoreAsciiCase("en-US") && + ( pEntrys->GetText( sContent, sCur, true )) && + !sContent.isEmpty() && + helper::isWellFormedXML( sContent )) + { + const OString& sText( sContent ); + OString sAdditionalLine{ "\n " + rOpenTag }; + OString sSearch{ sLangAttribute + "=\"" }; + OString sReplace( sSearch ); + + sSearch += GetAttribute( rOpenTag, sLangAttribute ); + sReplace += sCur; + + sAdditionalLine = sAdditionalLine.replaceFirst( + sSearch, sReplace) + sText + rCloseTag; + + Output( sAdditionalLine ); + } + } + } + } + pResData.reset(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |