diff options
Diffstat (limited to 'l10ntools/source')
24 files changed, 6371 insertions, 0 deletions
diff --git a/l10ntools/source/cfg_yy_wrapper.c b/l10ntools/source/cfg_yy_wrapper.c new file mode 100644 index 000000000..485ce0ab6 --- /dev/null +++ b/l10ntools/source/cfg_yy_wrapper.c @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +// Helper to suppress warnings in lex generated c code, see #i57362# +#include "cfg_yy.c" + +void (*avoid_unused_yyunput_in_cfg_yy_c)() = yyunput; +int (*avoid_unused_yy_flex_strlen_in_cfg_yy_c)() = yy_flex_strlen; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/cfglex.l b/l10ntools/source/cfglex.l new file mode 100644 index 000000000..1bac5a32d --- /dev/null +++ b/l10ntools/source/cfglex.l @@ -0,0 +1,165 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +%{ + +/* + * lexer for parsing cfg source files + */ + +#include <sal/config.h> + +/* enlarge token buffer to tokenize whole strings */ +#undef YYLMAX +#define YYLMAX 64000 + +/* to enable debug output define LEXDEBUG */ +#define LEXDEBUG 1 +#ifdef LEXDEBUG +#define OUTPUT fprintf +#else +#define OUTPUT(Par1,Par2); +#endif + +/* table of possible token ids */ +#include <tokens.h> +#include <stdlib.h> +#include <stdio.h> + +#include <sal/main.h> + +#include <cfglex.hxx> + +#define YY_NO_UNISTD_H + +static int yycolumn = 1; +#define YY_USER_ACTION yycolumn += yyleng; + +static int bText=0; +%} + +%option yylineno +%option nounput +%option never-interactive + +%p 24000 +%e 1200 +%n 500 + +%% + +\<[^\>]*"xml:lang="\""x-no-translate"\"[^\<]*\/\> { + bText = 0; + workOnTokenSet( CFG_TOKEN_NO_TRANSLATE, yytext ); +} + +\<.*\/\> { + bText = 0; + workOnTokenSet( ANYTOKEN, yytext ); +} + +\<[^\>]*"xml:lang="\".*\"[^\<]*\> { + bText = 1; + workOnTokenSet( CFG_TEXT_START, yytext ); +} + + +\<[^\/\!][^\>]*\> { + bText = 0; + workOnTokenSet( CFG_TAG, yytext ); +} + +"<!"DOCTYPE[^\>]*\> { + bText = 0; + workOnTokenSet( CFG_TAG, yytext ); +} + + +\<\!\-\- { + char c1 = 0, c2 = 0; + int c3 = yyinput(); + char pChar[2]; + pChar[1] = 0x00; + pChar[0] = c3; + + workOnTokenSet( COMMENT, yytext ); + workOnTokenSet( COMMENT, pChar ); + + for(;;) { + if ( c3 == EOF ) + break; + if ( c1 == '-' && c2 == '-' && c3 == '>' ) + break; + c1 = c2; + c2 = c3; + c3 = yyinput(); + + pChar[0] = c3; + workOnTokenSet( COMMENT, pChar ); + } +} + +\<\/[^\>]*\> { + bText = 0; + workOnTokenSet( CFG_CLOSETAG, yytext ); +} + +\<[^\>\!]*\> { + bText = 0; + if ( yytext[ 1 ] == '!' && yytext[ 2 ] == '-' && yytext[ 3 ] == '-' ) + workOnTokenSet( COMMENT, yytext ); + else + workOnTokenSet( CFG_UNKNOWNTAG, yytext ); +} + +.|\n { + yycolumn = 1; + if ( bText == 1 ) + workOnTokenSet( CFG_TEXTCHAR, yytext ); + else + workOnTokenSet( UNKNOWNCHAR, yytext ); +} + + +%% + +/*****************************************************************************/ +int yywrap(void) +/*****************************************************************************/ +{ + return 1; +} + +/*****************************************************************************/ +void yyerror ( const char *s ) +/*****************************************************************************/ +{ + /* write error to stderr */ + fprintf( stderr, + "Error: \"%s\" in line %d, column %d: \"%s\"\n", s, yylineno, yycolumn, yytext ); + exit(EXIT_FAILURE); +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { + yyin = init(argc, argv); + yylex(); + return EXIT_SUCCESS; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/l10ntools/source/cfgmerge.cxx b/l10ntools/source/cfgmerge.cxx new file mode 100644 index 000000000..e02e497d5 --- /dev/null +++ b/l10ntools/source/cfgmerge.cxx @@ -0,0 +1,485 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cfglex.hxx> +#include <common.hxx> + +#include <cstdio> +#include <cstdlib> +#include <cstring> + +#include <memory> +#include <rtl/strbuf.hxx> + +#include <helper.hxx> +#include <export.hxx> +#include <cfgmerge.hxx> +#include <tokens.h> + +namespace { + +namespace global { + +OString inputPathname; +std::unique_ptr< CfgParser > parser; + +} +} + +extern "C" { + +FILE * init(int argc, char ** argv) { + + common::HandledArgs aArgs; + if ( !common::handleArguments(argc, argv, aArgs) ) + { + common::writeUsage("cfgex","*.xcu"); + std::exit(EXIT_FAILURE); + } + global::inputPathname = aArgs.m_sInputFile; + + FILE * pFile = std::fopen(global::inputPathname.getStr(), "r"); + if (pFile == nullptr) { + std::fprintf( + stderr, "Error: Cannot open file \"%s\"\n", + global::inputPathname.getStr() ); + std::exit(EXIT_FAILURE); + } + + if (aArgs.m_bMergeMode) { + global::parser.reset( + new CfgMerge( + aArgs.m_sMergeSrc, aArgs.m_sOutputFile, + global::inputPathname, aArgs.m_sLanguage )); + } else { + global::parser.reset( + new CfgExport( + aArgs.m_sOutputFile, global::inputPathname )); + } + + return pFile; +} + +void workOnTokenSet(int nTyp, char * pTokenText) { + global::parser->Execute( nTyp, pTokenText ); +} + +} + + + + +CfgStackData* CfgStack::Push(const OString &rTag, const OString &rId) +{ + CfgStackData *pD = new CfgStackData( rTag, rId ); + maList.push_back( pD ); + return pD; +} + + + + +CfgStack::~CfgStack() +{ +} + +OString CfgStack::GetAccessPath( size_t nPos ) +{ + OStringBuffer sReturn; + for (size_t i = 0; i <= nPos; ++i) + { + if (i) + sReturn.append('.'); + sReturn.append(maList[i]->GetIdentifier()); + } + + return sReturn.makeStringAndClear(); +} + +CfgStackData *CfgStack::GetStackData() +{ + if (!maList.empty()) + return maList[maList.size() - 1]; + else + return nullptr; +} + + + + +CfgParser::CfgParser() + : pStackData( nullptr ), + bLocalize( false ) +{ +} + +CfgParser::~CfgParser() +{ +} + +bool CfgParser::IsTokenClosed(const OString &rToken) +{ + return rToken[rToken.getLength() - 2] == '/'; +} + +void CfgParser::AddText( + OString &rText, + const OString &rIsoLang, + const OString &rResTyp ) +{ + rText = rText.replaceAll(OString('\n'), OString()). + replaceAll(OString('\r'), OString()). + replaceAll(OString('\t'), OString()); + pStackData->sResTyp = rResTyp; + WorkOnText( rText, rIsoLang ); + pStackData->sText[ rIsoLang ] = rText; +} + +#if defined _MSC_VER +#pragma warning(disable: 4702) // unreachable code, bug in MSVC2015, it thinks the std::exit is unreachable +#endif +void CfgParser::ExecuteAnalyzedToken( int nToken, char *pToken ) +{ + OString sToken( pToken ); + + if ( sToken == " " || sToken == "\t" ) + sLastWhitespace += sToken; + + OString sTokenName; + OString sTokenId; + + bool bOutput = true; + + switch ( nToken ) { + case CFG_TOKEN_PACKAGE: + case CFG_TOKEN_COMPONENT: + case CFG_TOKEN_TEMPLATE: + case CFG_TOKEN_CONFIGNAME: + case CFG_TOKEN_OORNAME: + case CFG_TOKEN_OORVALUE: + case CFG_TAG: + case ANYTOKEN: + case CFG_TEXT_START: + { + sTokenName = sToken.getToken(1, '<').getToken(0, '>'). + getToken(0, ' '); + + if ( !IsTokenClosed( sToken )) { + OString sSearch; + switch ( nToken ) { + case CFG_TOKEN_PACKAGE: + sSearch = "package-id="; + break; + case CFG_TOKEN_COMPONENT: + sSearch = "component-id="; + break; + case CFG_TOKEN_TEMPLATE: + sSearch = "template-id="; + break; + case CFG_TOKEN_CONFIGNAME: + sSearch = "cfg:name="; + break; + case CFG_TOKEN_OORNAME: + sSearch = "oor:name="; + bLocalize = true; + break; + case CFG_TOKEN_OORVALUE: + sSearch = "oor:value="; + break; + case CFG_TEXT_START: { + if ( sCurrentResTyp != sTokenName ) { + WorkOnResourceEnd(); + } + sCurrentResTyp = sTokenName; + + OString sTemp = sToken.copy( sToken.indexOf( "xml:lang=" )); + sCurrentIsoLang = sTemp.getToken(1, '"'); + + if ( sCurrentIsoLang == NO_TRANSLATE_ISO ) + bLocalize = false; + + pStackData->sTextTag = sToken; + + sCurrentText = ""; + } + break; + } + if ( !sSearch.isEmpty()) + { + OString sTemp = sToken.copy( sToken.indexOf( sSearch )); + sTokenId = sTemp.getToken(1, '"'); + } + pStackData = aStack.Push( sTokenName, sTokenId ); + + if ( sSearch == "cfg:name=" ) { + OString sTemp( sToken.toAsciiUpperCase() ); + bLocalize = sTemp.indexOf("CFG:TYPE=\"STRING\"")>=0 + && sTemp.indexOf( "CFG:LOCALIZED=\"TRUE\"" )>=0; + } + } + else if ( sTokenName == "label" ) { + if ( sCurrentResTyp != sTokenName ) { + WorkOnResourceEnd(); + } + sCurrentResTyp = sTokenName; + } + } + break; + case CFG_CLOSETAG: + { + sTokenName = sToken.getToken(1, '/').getToken(0, '>'). + getToken(0, ' '); + if ( aStack.GetStackData() && ( aStack.GetStackData()->GetTagType() == sTokenName )) + { + if (sCurrentText.isEmpty()) + WorkOnResourceEnd(); + aStack.Pop(); + pStackData = aStack.GetStackData(); + } + else + { + const OString sError{ "Misplaced close tag: " + sToken + " in file " + global::inputPathname }; + yyerror(sError.getStr()); + std::exit(EXIT_FAILURE); + } + } + break; + + case CFG_TEXTCHAR: + sCurrentText += sToken; + bOutput = false; + break; + + case CFG_TOKEN_NO_TRANSLATE: + bLocalize = false; + break; + } + + if ( !sCurrentText.isEmpty() && nToken != CFG_TEXTCHAR ) + { + AddText( sCurrentText, sCurrentIsoLang, sCurrentResTyp ); + Output( sCurrentText ); + sCurrentText.clear(); + pStackData->sEndTextTag = sToken; + } + + if ( bOutput ) + Output( sToken ); + + if ( sToken != " " && sToken != "\t" ) + sLastWhitespace = ""; +} + +void CfgExport::Output(const OString&) +{ +} + +void CfgParser::Execute( int nToken, char * pToken ) +{ + OString sToken( pToken ); + + switch ( nToken ) { + case CFG_TAG: + if ( sToken.indexOf( "package-id=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_PACKAGE, pToken ); + return; + } else if ( sToken.indexOf( "component-id=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_COMPONENT, pToken ); + return; + } else if ( sToken.indexOf( "template-id=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_TEMPLATE, pToken ); + return; + } else if ( sToken.indexOf( "cfg:name=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_OORNAME, pToken ); + return; + } else if ( sToken.indexOf( "oor:name=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_OORNAME, pToken ); + return; + } else if ( sToken.indexOf( "oor:value=" ) != -1 ) { + ExecuteAnalyzedToken( CFG_TOKEN_OORVALUE, pToken ); + return; + } + break; + } + ExecuteAnalyzedToken( nToken, pToken ); +} + + + + +CfgExport::CfgExport( + const OString &rOutputFile, + const OString &rFilePath ) + + : sPath( rFilePath ) +{ + pOutputStream.open( rOutputFile, PoOfstream::APP ); + if (!pOutputStream.isOpen()) + { + std::cerr << "ERROR: Unable to open output file: " << rOutputFile << "\n"; + std::exit(EXIT_FAILURE); + } +} + +CfgExport::~CfgExport() +{ + pOutputStream.close(); +} + + +void CfgExport::WorkOnResourceEnd() +{ + if ( !bLocalize ) + return; + + if ( pStackData->sText["en-US"].isEmpty() ) + return; + + OString sXComment = pStackData->sText[OString("x-comment")]; + OString sLocalId = pStackData->sIdentifier; + OString sGroupId; + if ( aStack.size() == 1 ) { + sGroupId = sLocalId; + sLocalId = ""; + } + else { + sGroupId = aStack.GetAccessPath( aStack.size() - 2 ); + } + + + OString sText = pStackData->sText[ "en-US" ]; + sText = helper::UnQuotHTML( sText ); + + common::writePoEntry( + "Cfgex", pOutputStream, sPath, pStackData->sResTyp, + sGroupId, sLocalId, sXComment, sText); +} + +void CfgExport::WorkOnText( + OString &rText, + const OString &rIsoLang +) +{ + if( !rIsoLang.isEmpty() ) rText = helper::UnQuotHTML( rText ); +} + + + + +CfgMerge::CfgMerge( + const OString &rMergeSource, const OString &rOutputFile, + const OString &rFilename, const OString &rLanguage ) + : sFilename( rFilename ), + bEnglish( false ) +{ + pOutputStream.open( + rOutputFile.getStr(), std::ios_base::out | std::ios_base::trunc); + if (!pOutputStream.is_open()) + { + std::cerr << "ERROR: Unable to open output file: " << rOutputFile << "\n"; + std::exit(EXIT_FAILURE); + } + + if (!rMergeSource.isEmpty()) + { + pMergeDataFile.reset(new MergeDataFile( + rMergeSource, global::inputPathname, true )); + if (rLanguage.equalsIgnoreAsciiCase("ALL") ) + { + aLanguages = pMergeDataFile->GetLanguages(); + } + else aLanguages.push_back(rLanguage); + } + else + aLanguages.push_back(rLanguage); +} + +CfgMerge::~CfgMerge() +{ + pOutputStream.close(); +} + +void CfgMerge::WorkOnText(OString &, const OString& rLangIndex) +{ + if ( !(pMergeDataFile && bLocalize) ) + return; + + if ( !pResData ) { + OString sLocalId = pStackData->sIdentifier; + OString sGroupId; + if ( aStack.size() == 1 ) { + sGroupId = sLocalId; + sLocalId.clear(); + } + else { + sGroupId = aStack.GetAccessPath( aStack.size() - 2 ); + } + + pResData.reset( new ResData( sGroupId, sFilename ) ); + pResData->sId = sLocalId; + pResData->sResTyp = pStackData->sResTyp; + } + + if (rLangIndex.equalsIgnoreAsciiCase("en-US")) + bEnglish = true; +} + +void CfgMerge::Output(const OString& rOutput) +{ + pOutputStream << rOutput; +} + +void CfgMerge::WorkOnResourceEnd() +{ + + if ( pMergeDataFile && pResData && bLocalize && bEnglish ) { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrysCaseSensitive( pResData.get() ); + if ( pEntrys ) { + OString sCur; + + for( size_t i = 0; i < aLanguages.size(); ++i ){ + sCur = aLanguages[ i ]; + + OString sContent; + pEntrys->GetText( sContent, sCur, true ); + if ( + ( !sCur.equalsIgnoreAsciiCase("en-US") ) && !sContent.isEmpty()) + { + OString sTextTag = pStackData->sTextTag; + const sal_Int32 nLangAttributeStart{ sTextTag.indexOf( "xml:lang=" ) }; + const sal_Int32 nLangStart{ sTextTag.indexOf( '"', nLangAttributeStart )+1 }; + const sal_Int32 nLangEnd{ sTextTag.indexOf( '"', nLangStart ) }; + OString sAdditionalLine{ "\t" + + sTextTag.replaceAt(nLangStart, nLangEnd-nLangStart, sCur) + + helper::QuotHTML(sContent) + + pStackData->sEndTextTag + + "\n" + + sLastWhitespace }; + Output( sAdditionalLine ); + } + } + } + } + pResData.reset(); + bEnglish = false; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/common.cxx b/l10ntools/source/common.cxx new file mode 100644 index 000000000..84adb767c --- /dev/null +++ b/l10ntools/source/common.cxx @@ -0,0 +1,159 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <common.hxx> + +namespace { + +//flags for handleArguments() +enum class State { + NONE, Input, Output, MergeSrc, Languages +}; + +} + +namespace common { + +bool handleArguments( + int argc, char * argv[], HandledArgs& o_aHandledArgs) +{ + o_aHandledArgs = HandledArgs(); + State nState = State::NONE; + + for( int i = 1; i < argc; i++ ) + { + if ( OString( argv[ i ] ).toAsciiUpperCase() == "-I" ) + { + nState = State::Input; // next token specifies source file + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-O" ) + { + nState = State::Output; // next token specifies the dest file + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-M" ) + { + nState = State::MergeSrc; // next token specifies the merge database + o_aHandledArgs.m_bMergeMode = true; + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-L" ) + { + nState = State::Languages; + } + else if ( OString( argv[ i ] ).toAsciiUpperCase() == "-B" ) + { + o_aHandledArgs.m_bUTF8BOM = true; + } + else + { + switch ( nState ) + { + case State::NONE: + { + return false; // no valid command line + } + case State::Input: + { + o_aHandledArgs.m_sInputFile = OString( argv[i] ); + } + break; + case State::Output: + { + o_aHandledArgs.m_sOutputFile = OString( argv[i] ); + } + break; + case State::MergeSrc: + { + o_aHandledArgs.m_sMergeSrc = OString( argv[i] ); + } + break; + case State::Languages: + { + o_aHandledArgs.m_sLanguage = OString( argv[i] ); + } + break; + } + } + } + if( !o_aHandledArgs.m_sInputFile.isEmpty() && + !o_aHandledArgs.m_sOutputFile.isEmpty() ) + { + return true; + } + else + { + o_aHandledArgs = HandledArgs(); + return false; + } +} + +void writeUsage(const OString& rName, const OString& rFileType) +{ + std::cout + << " Syntax: " << rName + << " -i FileIn -o FileOut [-m DataBase] [-l Lang] [-b]\n" + " FileIn: Source files (" << rFileType << ")\n" + " FileOut: Destination file (*.*)\n" + " DataBase: Mergedata (*.po)\n" + " Lang: Restrict the handled language; one element of\n" + " (de, en-US, ...) or all\n" + " -b: Add UTF-8 Byte Order Mark to FileOut(use with -m option)\n"; +} + +void writePoEntry( + const OString& rExecutable, PoOfstream& rPoStream, const OString& rSourceFile, + const OString& rResType, const OString& rGroupId, const OString& rLocalId, + const OString& rHelpText, const OString& rText, const PoEntry::TYPE eType ) +{ + try + { + PoEntry aPO(rSourceFile, rResType, rGroupId, rLocalId, rHelpText, rText, eType); + rPoStream.writeEntry( aPO ); + } + catch( PoEntry::Exception& aException ) + { + if(aException == PoEntry::NOSOURCFILE) + { + std::cerr << rExecutable << " warning: no sourcefile specified for po entry\n"; + } + else + { + std::cerr << rExecutable << " warning: invalid po attributes extracted from " << rSourceFile << "\n"; + if(aException == PoEntry::NOGROUPID) + { + std::cerr << "No groupID specified!\n"; + std::cerr << "String: " << rText << "\n"; + } + else if (aException == PoEntry::NOSTRING) + { + std::cerr << "No string specified!\n"; + std::cerr << "GroupID: " << rGroupId << "\n"; + if( !rLocalId.isEmpty() ) std::cerr << "LocalID: " << rLocalId << "\n"; + } + else + { + if (aException == PoEntry::NORESTYPE) + { + std::cerr << "No resource type specified!\n"; + } + else if (aException == PoEntry::WRONGHELPTEXT) + { + std::cerr << "x-comment length is 5 characters:" << rHelpText << "\n"; + } + + std::cerr << "GroupID: " << rGroupId << "\n"; + if( !rLocalId.isEmpty() ) std::cerr << "LocalID: " << rLocalId << "\n"; + std::cerr << "String: " << rText << "\n"; + } + } + } +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/helper.cxx b/l10ntools/source/helper.cxx new file mode 100644 index 000000000..b842755c2 --- /dev/null +++ b/l10ntools/source/helper.cxx @@ -0,0 +1,147 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <helper.hxx> + +namespace helper { + +OString escapeAll( + const OString& rText, const OString& rUnEscaped, const OString& rEscaped ) +{ + assert( rEscaped.getLength() == 2*rUnEscaped.getLength() ); + OStringBuffer sReturn; + for ( sal_Int32 nIndex = 0; nIndex < rText.getLength(); ++nIndex ) + { + sal_Int32 nUnEscapedOne = rUnEscaped.indexOf(rText[nIndex]); + if( nUnEscapedOne != -1 ) + { + sReturn.append(rEscaped.copy(nUnEscapedOne*2,2)); + } + else + sReturn.append(rText[nIndex]); + } + return sReturn.makeStringAndClear(); +} + + +OString unEscapeAll( + const OString& rText, const OString& rEscaped, const OString& rUnEscaped) +{ + assert( rEscaped.getLength() == 2*rUnEscaped.getLength() ); + OStringBuffer sReturn; + const sal_Int32 nLength = rText.getLength(); + for ( sal_Int32 nIndex = 0; nIndex < nLength; ++nIndex ) + { + if( rText[nIndex] == '\\' && nIndex+1 < nLength ) + { + sal_Int32 nEscapedOne = rEscaped.indexOf(rText.copy(nIndex,2)); + if( nEscapedOne != -1 ) + { + sReturn.append(rUnEscaped[nEscapedOne/2]); + ++nIndex; + } + else + { + sReturn.append(rText[nIndex]); + } + } + else + sReturn.append(rText[nIndex]); + } + return sReturn.makeStringAndClear(); +} + + +OString QuotHTML(const OString &rString) +{ + OStringBuffer sReturn; + for (sal_Int32 i = 0; i < rString.getLength(); ++i) + { + switch (rString[i]) + { + case '<': + sReturn.append("<"); + break; + case '>': + sReturn.append(">"); + break; + case '"': + sReturn.append("""); + break; + case '\'': + sReturn.append("'"); + break; + case '&': + if (rString.match("&", i)) + sReturn.append('&'); + else + sReturn.append("&"); + break; + default: + sReturn.append(rString[i]); + break; + } + } + return sReturn.makeStringAndClear(); +} + +OString UnQuotHTML( const OString& rString ) +{ + OStringBuffer sReturn; + for (sal_Int32 i = 0; i != rString.getLength();) { + if (rString.match("&", i)) { + sReturn.append('&'); + i += RTL_CONSTASCII_LENGTH("&"); + } else if (rString.match("<", i)) { + sReturn.append('<'); + i += RTL_CONSTASCII_LENGTH("<"); + } else if (rString.match(">", i)) { + sReturn.append('>'); + i += RTL_CONSTASCII_LENGTH(">"); + } else if (rString.match(""", i)) { + sReturn.append('"'); + i += RTL_CONSTASCII_LENGTH("""); + } else if (rString.match("'", i)) { + sReturn.append('\''); + i += RTL_CONSTASCII_LENGTH("'"); + } else { + sReturn.append(rString[i]); + ++i; + } + } + return sReturn.makeStringAndClear(); +} + +bool isWellFormedXML( OString const & text ) +{ + xmlDocPtr doc; + bool result = true; + + OString content = "<root>" + text + "</root>"; + doc = xmlParseMemory(content.getStr(),static_cast<int>(content.getLength())); + if (doc == nullptr) { + result = false; + } + xmlFreeDoc(doc); + xmlCleanupParser(); + return result; +} + +//Convert xmlChar* to OString +OString xmlStrToOString( const xmlChar* pString ) +{ + xmlChar* pTemp = xmlStrdup( pString ); + OString sResult = reinterpret_cast<char*>( pTemp ); + xmlFree( pTemp ); + return sResult; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/helpex.cxx b/l10ntools/source/helpex.cxx new file mode 100644 index 000000000..3bfacb823 --- /dev/null +++ b/l10ntools/source/helpex.cxx @@ -0,0 +1,143 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <iostream> +#include <fstream> +#include <string> +#include <cstring> + +#include <sal/main.h> + +#include <helpmerge.hxx> +#include <common.hxx> +#include <memory> + +#ifndef TESTDRIVER + +static void WriteUsage() +{ + std::cout + << (" Syntax: Helpex -[m]i FileIn -o FileOut [-m DataBase] [-l Lang]\n" + " FileIn + i: Source file (*.xhp)\n" + " FileIn + -mi: File including paths of source files" + " (only for merge)\n" + " FileOut: Destination file (*.*) or files (in case of -mi)\n" + " DataBase: Mergedata (*.po)\n" + " Lang: Restrict the handled languages; one element of\n" + " (de, en-US, ...) or all\n"); +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + bool hasNoError = true; + try + { + bool bMultiMerge = false; + for (int nIndex = 1; nIndex != argc; ++nIndex) + { + if (std::strcmp(argv[nIndex], "-mi") == 0) + { + argv[nIndex][1] = 'i'; + argv[nIndex][2] = '\0'; + bMultiMerge = true; + break; + } + } + + common::HandledArgs aArgs; + if ( !common::handleArguments( argc, argv, aArgs) ) + { + WriteUsage(); + return 1; + } + + if ( aArgs.m_bMergeMode ) + { + if( bMultiMerge ) + { + std::ifstream aInput( aArgs.m_sInputFile.getStr() ); + if( !aInput.is_open() ) + { + std::cerr << "Helpex error: cannot open input file\n"; + return 1; + } + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( aArgs.m_sLanguage != "qtz") + { + pMergeDataFile.reset(new MergeDataFile(aArgs.m_sMergeSrc, OString(), false, false )); + } + std::string sTemp; + aInput >> sTemp; + while( !aInput.eof() ) + { + // coverity[tainted_data] - this is a build time tool + const OString sXhpFile( sTemp.data(), static_cast<sal_Int32>(sTemp.length()) ); + HelpParser aParser( sXhpFile ); + const OString sOutput( + aArgs.m_sOutputFile + + sXhpFile.copy( sXhpFile.lastIndexOf('/') )); + if( !aParser.Merge( sOutput, + aArgs.m_sLanguage, pMergeDataFile.get() )) + { + hasNoError = false; + } + aInput >> sTemp; + } + aInput.close(); + } + else + { + HelpParser aParser( aArgs.m_sInputFile ); + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( aArgs.m_sLanguage != "qtz") + { + pMergeDataFile.reset(new MergeDataFile(aArgs.m_sMergeSrc, aArgs.m_sInputFile, false, false )); + } + hasNoError = + aParser.Merge( + aArgs.m_sOutputFile, + aArgs.m_sLanguage, pMergeDataFile.get() ); + } + } + else + { + HelpParser aParser( aArgs.m_sInputFile ); + std::unique_ptr<XMLFile> xmlfile(new XMLFile( OString('0') )); + hasNoError = + HelpParser::CreatePO( + aArgs.m_sOutputFile, aArgs.m_sInputFile, + xmlfile.get(), "help" ); + } + } + catch (std::exception& e) + { + std::cerr << "Helpex exception: " << e.what() << std::endl; + hasNoError = true; + } + + if( hasNoError ) + return 0; + else + return 1; +} +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/helpmerge.cxx b/l10ntools/source/helpmerge.cxx new file mode 100644 index 000000000..abab8a5a0 --- /dev/null +++ b/l10ntools/source/helpmerge.cxx @@ -0,0 +1,260 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <fstream> +#include <functional> +#include <memory> + +#include <osl/file.hxx> +#include <sal/log.hxx> + +#include <stdio.h> +#include <stdlib.h> +#include <helpmerge.hxx> +#include <algorithm> +#include <sys/types.h> +#include <sys/stat.h> +#include <iostream> +#include <vector> +#include <rtl/strbuf.hxx> +#ifdef _WIN32 +#if !defined WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +#undef CopyFile +#include <direct.h> +#endif + +#include <export.hxx> +#include <common.hxx> +#include <helper.hxx> +#include <po.hxx> + +#if OSL_DEBUG_LEVEL > 2 +void HelpParser::Dump(XMLHashMap* rElem_in) +{ + for (auto const& pos : *rElem_in) + { + Dump(pos.second,pos.first); + } +} + +void HelpParser::Dump(LangHashMap* rElem_in,const OString & sKey_in) +{ + OString x; + OString y; + fprintf(stdout,"+------------%s-----------+\n",sKey_in.getStr() ); + for (auto const& posn : *rElem_in) + { + x=posn.first; + y=posn.second->ToOString(); + fprintf(stdout,"key=%s value=%s\n",x.getStr(),y.getStr()); + } + fprintf(stdout,"+--------------------------+\n"); +} +#endif + +HelpParser::HelpParser( const OString &rHelpFile ) + : sHelpFile( rHelpFile ) + {}; + +/*****************************************************************************/ +bool HelpParser::CreatePO( +/*****************************************************************************/ + const OString &rPOFile_in, const OString &sHelpFile, + XMLFile* pXmlFile, const OString &rGsi1){ + SimpleXMLParser aParser; + //TODO: explicit BOM handling? + + if (!aParser.Execute( sHelpFile, pXmlFile )) + { + printf( + "%s: %s\n", + sHelpFile.getStr(), + aParser.GetError().m_sMessage.getStr()); + exit(-1); + } + pXmlFile->Extract(); + if( !pXmlFile->CheckExportStatus() ){ + return true; + } + + PoOfstream aPoOutput( rPOFile_in, PoOfstream::APP ); + + if (!aPoOutput.isOpen()) { + fprintf(stdout,"Can't open file %s\n",rPOFile_in.getStr()); + return false; + } + + XMLHashMap* aXMLStrHM = pXmlFile->GetStrings(); + + std::vector<OString> order = pXmlFile->getOrder(); + + for (auto const& pos : order) + { + auto posm = aXMLStrHM->find(pos); + LangHashMap* pElem = posm->second; + + XMLElement* pXMLElement = (*pElem)[ "en-US" ]; + + if( pXMLElement != nullptr ) + { + OString data( + pXMLElement->ToOString(). + replaceAll("\n",OString()). + replaceAll("\t",OString()).trim()); + + common::writePoEntry( + "Helpex", aPoOutput, sHelpFile, rGsi1, + posm->first, OString(), OString(), data); + + pXMLElement=nullptr; + } + else + { + // If this is something totally unexpected, wouldn't an assert() be in order? + // On the other hand, if this is expected, why the printf? + fprintf(stdout,"\nDBG: NullPointer in HelpParser::CreatePO, File %s\n", sHelpFile.getStr()); + } + } + aPoOutput.close(); + + return true; +} + +bool HelpParser::Merge( const OString &rDestinationFile, + const OString& rLanguage , MergeDataFile* pMergeDataFile ) +{ + SimpleXMLParser aParser; + + //TODO: explicit BOM handling? + + std::unique_ptr<XMLFile> xmlfile(new XMLFile( OString('0') )); + if (!aParser.Execute( sHelpFile, xmlfile.get())) + { + SAL_WARN("l10ntools", "could not parse " << sHelpFile); + return false; + } + MergeSingleFile( xmlfile.get() , pMergeDataFile , rLanguage , rDestinationFile ); + return true; +} + +void HelpParser::MergeSingleFile( XMLFile* file , MergeDataFile* pMergeDataFile , const OString& sLanguage , + OString const & sPath ) +{ + file->Extract(); + + XMLHashMap* aXMLStrHM = file->GetStrings(); + static ResData s_ResData("",""); + s_ResData.sResTyp = "help"; + + std::vector<OString> order = file->getOrder(); + + for (auto const& pos : order) // Merge every l10n related string in the same order as export + { + auto posm = aXMLStrHM->find(pos); + LangHashMap* aLangHM = posm->second; +#if OSL_DEBUG_LEVEL > 2 + printf("*********************DUMPING HASHMAP***************************************"); + Dump(aXMLStrHM); + printf("DBG: sHelpFile = %s\n",sHelpFile.getStr() ); +#endif + + s_ResData.sGId = posm->first; + s_ResData.sFilename = sHelpFile; + + ProcessHelp( aLangHM , sLanguage, &s_ResData , pMergeDataFile ); + } + + file->Write(sPath); +} + +/* ProcessHelp method: search for en-US entry and replace it with the current language*/ +void HelpParser::ProcessHelp( LangHashMap* aLangHM , const OString& sCur , ResData *pResData , MergeDataFile* pMergeDataFile ){ + + XMLElement* pXMLElement = nullptr; + MergeEntrys *pEntrys = nullptr; + + if( sCur.equalsIgnoreAsciiCase("en-US") ) + return; + + pXMLElement = (*aLangHM)[ "en-US" ]; + if( pXMLElement == nullptr ) + { + printf("Error: Can't find en-US entry\n"); + } + if( pXMLElement == nullptr ) + return; + + OString sNewText; + OString sNewdata; + OString sSourceText( + pXMLElement->ToOString(). + replaceAll( + "\n", + OString()). + replaceAll( + "\t", + OString())); + // re-add spaces to the beginning of translated string, + // important for indentation of Basic code examples + sal_Int32 nPreSpaces = 0; + sal_Int32 nLen = sSourceText.getLength(); + while ( (nPreSpaces < nLen) && (sSourceText[nPreSpaces] == ' ') ) + nPreSpaces++; + if( sCur == "qtz" ) + { + sNewText = MergeEntrys::GetQTZText(*pResData, sSourceText); + sNewdata = sNewText; + } + else if( pMergeDataFile ) + { + pEntrys = pMergeDataFile->GetMergeEntrys( pResData ); + if( pEntrys != nullptr) + { + pEntrys->GetText( sNewText, sCur, true ); + if (helper::isWellFormedXML(XMLUtil::QuotHTML(sNewText))) + { + sNewdata = sSourceText.copy(0,nPreSpaces) + sNewText; + } + } + } + if (!sNewdata.isEmpty()) + { + if( pXMLElement != nullptr ) + { + XMLData *data = new XMLData( sNewdata , nullptr ); // Add new one + pXMLElement->RemoveAndDeleteAllChildren(); + pXMLElement->AddChild( data ); + aLangHM->erase( sCur ); + } + } + else + { + SAL_WARN( + "l10ntools", + "Can't find GID=" << pResData->sGId << " TYP=" << pResData->sResTyp); + } + pXMLElement->ChangeLanguageTag(sCur); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/idxdict/idxdict.cxx b/l10ntools/source/idxdict/idxdict.cxx new file mode 100644 index 000000000..0a4a6ddb0 --- /dev/null +++ b/l10ntools/source/idxdict/idxdict.cxx @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <cerrno> +#include <iostream> +#include <fstream> +#include <string> +#include <map> +#include <stdlib.h> +#include <string.h> + +static const int MAXLINE = 1024*64; + +using namespace std; + +int main(int argc, char *argv[]) +{ + if (argc != 3 || strcmp(argv[1],"-o")) + { + cout << "Usage: idxdict -o outputfile < input\n"; + ::exit(99); + } + // This call improves performance by approx 5x + std::ios_base::sync_with_stdio(false); + + const char * outputFile(argv[2]); + char inputBuffer[MAXLINE]; + multimap<string, size_t> entries; + multimap<string,size_t>::iterator ret(entries.begin()); + + cin.getline(inputBuffer, MAXLINE); + const string encoding(inputBuffer); + size_t currentOffset(encoding.size()+1); + while (true) + { + // Extract the next word, but not the entry count + cin.getline(inputBuffer, MAXLINE, '|'); + + if (cin.eof()) break; + + string word(inputBuffer); + ret = entries.insert(ret, pair<string, size_t>(word, currentOffset)); + currentOffset += word.size() + 1; + // Next is the entry count + cin.getline(inputBuffer, MAXLINE); + if (!cin.good()) + { + cerr << "Unable to read entry - insufficient buffer?.\n"; + exit(99); + } + currentOffset += strlen(inputBuffer)+1; + char * endptr; + errno = 0; + int entryCount(strtol(inputBuffer, &endptr, 10)); + if (errno != 0 || endptr == inputBuffer || *endptr != '\0') + { + cerr + << "Unable to read count from \"" << inputBuffer + << "\" input.\n"; + exit(99); + } + for (int i(0); i < entryCount; ++i) + { + cin.getline(inputBuffer, MAXLINE); + currentOffset += strlen(inputBuffer)+1; + } + } + + // Use binary mode to prevent any translation of LF to CRLF on Windows + ofstream outputStream(outputFile, ios_base::binary| ios_base::trunc|ios_base::out); + if (!outputStream.is_open()) + { + cerr << "Unable to open output file " << outputFile << endl; + ::exit(99); + } + + outputStream << encoding << '\n' << entries.size() << '\n'; + + for (auto const& entry : entries) + { + outputStream << entry.first << '|' << entry.second << '\n'; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/lngex.cxx b/l10ntools/source/lngex.cxx new file mode 100644 index 000000000..0d8bc6b93 --- /dev/null +++ b/l10ntools/source/lngex.cxx @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <sal/main.h> + +#include <lngmerge.hxx> + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { + + common::HandledArgs aArgs; + if ( !common::handleArguments(argc, argv, aArgs) ) + { + common::writeUsage("ulfex","*.ulf"); + return 1; + } + + LngParser aParser( aArgs.m_sInputFile ); + if ( aArgs.m_bMergeMode ) + aParser.Merge( + aArgs.m_sMergeSrc, aArgs.m_sOutputFile, aArgs.m_sLanguage ); + else + aParser.CreatePO( aArgs.m_sOutputFile ); + + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/lngmerge.cxx b/l10ntools/source/lngmerge.cxx new file mode 100644 index 000000000..f0cdaa3ff --- /dev/null +++ b/l10ntools/source/lngmerge.cxx @@ -0,0 +1,276 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cstddef> +#include <fstream> +#include <iterator> +#include <memory> +#include <string> + +#include <po.hxx> +#include <lngmerge.hxx> + +namespace { + +bool lcl_isNextGroup(OString &sGroup_out, const OString &sLineTrim) +{ + if (sLineTrim.startsWith("[") && sLineTrim.endsWith("]")) + { + sGroup_out = sLineTrim.getToken(1, '[').getToken(0, ']').trim(); + return true; + } + return false; +} + +void lcl_RemoveUTF8ByteOrderMarker( OString &rString ) +{ + if( rString.getLength() >= 3 && rString[0] == '\xEF' && + rString[1] == '\xBB' && rString[2] == '\xBF' ) + { + rString = rString.copy(3); + } +} + +} + + + +LngParser::LngParser(const OString &rLngFile) + : sSource( rLngFile ) +{ + std::ifstream aStream(sSource.getStr()); + if (!aStream.is_open()) + return; + + bool bFirstLine = true; + std::string s; + std::getline(aStream, s); + while (!aStream.eof()) + { + OString sLine(s.data(), s.length()); + + if( bFirstLine ) + { + // Always remove UTF8 BOM from the first line + lcl_RemoveUTF8ByteOrderMarker( sLine ); + bFirstLine = false; + } + + mvLines.push_back( sLine ); + std::getline(aStream, s); + } + mvLines.push_back( OString() ); +} + +LngParser::~LngParser() +{ +} + +void LngParser::CreatePO( const OString &rPOFile ) +{ + PoOfstream aPOStream( rPOFile, PoOfstream::APP ); + if (!aPOStream.isOpen()) { + std::cerr << "Ulfex error: Can't open po file:" << rPOFile << "\n"; + } + + size_t nPos = 0; + bool bStart = true; + OString sGroup, sLine; + OStringHashMap Text; + OString sID; + + while( nPos < mvLines.size() ) { + sLine = mvLines[ nPos++ ]; + while( nPos < mvLines.size() && !isNextGroup( sGroup , sLine ) ) { + ReadLine( sLine , Text ); + sID = sGroup; + sLine = mvLines[ nPos++ ]; + } + if( bStart ) { + bStart = false; + sID = sGroup; + } + else { + WritePO( aPOStream , Text , sSource , sID ); + } + Text.erase("x-comment"); + } + aPOStream.close(); +} + +void LngParser::WritePO(PoOfstream &aPOStream, + OStringHashMap &rText_inout, const OString &rActFileName, + const OString &rID) +{ + common::writePoEntry( + "Ulfex", aPOStream, rActFileName, "LngText", + rID, OString(), rText_inout.count("x-comment") ? rText_inout["x-comment"] : OString(), rText_inout["en-US"]); +} + +bool LngParser::isNextGroup(OString &sGroup_out, const OString &sLine_in) +{ + return lcl_isNextGroup(sGroup_out, sLine_in.trim()); +} + +void LngParser::ReadLine(const OString &rLine_in, + OStringHashMap &rText_inout) +{ + if (!rLine_in.match(" *") && !rLine_in.match("/*")) + { + OString sLang(rLine_in.getToken(0, '=').trim()); + if (!sLang.isEmpty()) { + OString sText(rLine_in.getToken(1, '"')); + rText_inout[sLang] = sText; + } + } +} + +void LngParser::Merge( + const OString &rPOFile, + const OString &rDestinationFile, + const OString &rLanguage ) +{ + std::ofstream aDestination( + rDestinationFile.getStr(), std::ios_base::out | std::ios_base::trunc); + + MergeDataFile aMergeDataFile( rPOFile, sSource, false, true ); + if( rLanguage.equalsIgnoreAsciiCase("ALL") ) + aLanguages = aMergeDataFile.GetLanguages(); + + size_t nPos = 0; + bool bGroup = false; + OString sGroup; + + // seek to next group + while ( nPos < mvLines.size() && !bGroup ) + bGroup = lcl_isNextGroup(sGroup, mvLines[nPos++].trim()); + + while ( nPos < mvLines.size()) { + OStringHashMap Text; + OString sID( sGroup ); + std::size_t nLastLangPos = 0; + + std::unique_ptr<ResData> pResData( new ResData( sID, sSource ) ); + pResData->sResTyp = "LngText"; + MergeEntrys *pEntrys = aMergeDataFile.GetMergeEntrys( pResData.get() ); + // read languages + bGroup = false; + + OString sLanguagesDone; + + while ( nPos < mvLines.size() && !bGroup ) + { + const OString sLine{ mvLines[nPos].trim() }; + if ( lcl_isNextGroup(sGroup, sLine) ) + { + bGroup = true; + nPos ++; + sLanguagesDone = ""; + } + else + { + sal_Int32 n = 0; + OString sLang(sLine.getToken(0, '=', n)); + if (n == -1 || static_cast<bool>(sLine.match("/*"))) + { + ++nPos; + } + else + { + sLang = sLang.trim(); + + OString sSearch{ ";" + sLang + ";" }; + + if ( sLanguagesDone.indexOf( sSearch ) != -1 ) { + mvLines.erase( mvLines.begin() + nPos ); + } + if( pEntrys ) + { + if( !sLang.isEmpty() ) + { + OString sNewText; + pEntrys->GetText( sNewText, sLang, true ); + if( sLang == "qtz" ) + continue; + + if ( !sNewText.isEmpty()) { + mvLines[ nPos ] = sLang + + " = \"" + // escape quotes, unescape double escaped quotes fdo#56648 + + sNewText.replaceAll("\"","\\\"").replaceAll("\\\\\"","\\\"") + + "\""; + Text[ sLang ] = sNewText; + } + } + nLastLangPos = nPos; + nPos ++; + sLanguagesDone += sSearch; + } + else { + nLastLangPos = nPos; + nPos ++; + sLanguagesDone += sSearch; + } + } + } + } + OString sCur; + if ( nLastLangPos ) + { + for(size_t n = 0; n < aLanguages.size(); ++n) + { + sCur = aLanguages[ n ]; + if( !sCur.equalsIgnoreAsciiCase("en-US") && Text[sCur].isEmpty() && pEntrys ) + { + + OString sNewText; + pEntrys->GetText( sNewText, sCur, true ); + if( sCur == "qtz" ) + continue; + if ( !sNewText.isEmpty() && sCur != "x-comment") + { + const OString sLine { sCur + + " = \"" + // escape quotes, unescape double escaped quotes fdo#56648 + + sNewText.replaceAll("\"","\\\"").replaceAll("\\\\\"","\\\"") + + "\"" }; + + nLastLangPos++; + nPos++; + + if ( nLastLangPos < mvLines.size() ) { + mvLines.insert( mvLines.begin() + nLastLangPos, sLine ); + } else { + mvLines.push_back( sLine ); + } + } + } + } + } + } + + for ( size_t i = 0; i < mvLines.size(); ++i ) + aDestination << mvLines[i] << '\n'; + + aDestination.close(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/localize.cxx b/l10ntools/source/localize.cxx new file mode 100644 index 000000000..ee268cf11 --- /dev/null +++ b/l10ntools/source/localize.cxx @@ -0,0 +1,477 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cstddef> +#include <cstdlib> +#include <iostream> +#include <string> +#include <map> +#include <vector> +#include <algorithm> + +#include <osl/file.h> +#include <osl/file.hxx> +#include <osl/thread.h> +#include <rtl/string.h> +#include <rtl/string.hxx> +#include <rtl/textcvt.h> +#include <rtl/strbuf.hxx> +#include <rtl/ustring.h> +#include <rtl/ustring.hxx> +#include <sal/macros.h> +#include <sal/main.h> +#include <sal/types.h> + +#include <po.hxx> + +using namespace std; + +namespace { + +bool matchList( + const OUString& rUrl, const OUStringLiteral* pList, size_t nLength) +{ + for (size_t i = 0; i != nLength; ++i) { + if (rUrl.endsWith(pList[i])) { + return true; + } + } + return false; +} + +bool passesNegativeList(const OUString& rUrl) { + static const OUStringLiteral list[] = { + "/desktop/test/deployment/passive/help/en/help.tree", + "/desktop/test/deployment/passive/help/en/main.xhp", + "/dictionaries.xcu", + "/dictionaries/da_DK/help/da/help.tree", + ("/dictionaries/da_DK/help/da/" + "org.openoffice.da.hunspell.dictionaries/page1.xhp"), + ("/dictionaries/da_DK/help/da/" + "org.openoffice.da.hunspell.dictionaries/page2.xhp"), + "/dictionaries/hu_HU/help/hu/help.tree", + ("/dictionaries/hu_HU/help/hu/" + "org.openoffice.hu.hunspell.dictionaries/page1.xhp"), + "/officecfg/registry/data/org/openoffice/Office/Accelerators.xcu" + }; + return !matchList(rUrl, list, SAL_N_ELEMENTS(list)); +} + +bool passesPositiveList(const OUString& rUrl) { + static const OUStringLiteral list[] = { + "/description.xml" + }; + return matchList(rUrl, list, SAL_N_ELEMENTS(list)); +} + +void handleCommand( + const OString& rInPath, const OString& rOutPath, + const std::string& rExecutable) +{ + OStringBuffer buf; + if (rExecutable == "uiex" || rExecutable == "hrcex") + { + buf.append(OString(getenv("SRC_ROOT"))); + buf.append("/solenv/bin/"); + } + else + { + buf.append(OString(getenv("WORKDIR_FOR_BUILD"))); + buf.append("/LinkTarget/Executable/"); + } + buf.append(rExecutable.data()); + buf.append(" -i "); + buf.append(rInPath); + buf.append(" -o "); + buf.append(rOutPath); + + const OString cmd = buf.makeStringAndClear(); + if (system(cmd.getStr()) != 0) + { + cerr << "Error: Failed to execute " << cmd << '\n'; + throw false; //TODO + } +} + +void InitPoFile( + const OString& rProject, const OString& rInPath, + const OString& rPotDir, const OString& rOutPath ) +{ + //Create directory for po file + { + OUString outDir = + OStringToOUString( + rPotDir.copy(0,rPotDir.lastIndexOf('/')), RTL_TEXTENCODING_UTF8); + OUString outDirUrl; + if (osl::FileBase::getFileURLFromSystemPath(outDir, outDirUrl) + != osl::FileBase::E_None) + { + cerr + << ("Error: Cannot convert pathname to URL in " __FILE__ + ", in line ") + << __LINE__ << "\n outDir: " + << outDir + << "\n"; + throw false; //TODO + } + osl::Directory::createPath(outDirUrl); + } + + //Add header to the po file + PoOfstream aPoOutPut; + aPoOutPut.open(rOutPath.getStr()); + if (!aPoOutPut.isOpen()) + { + cerr + << "Error: Cannot open po file " + << rOutPath << "\n"; + throw false; //TODO + } + + const sal_Int32 nProjectInd = rInPath.indexOf(rProject); + const OString relativPath = + rInPath.copy(nProjectInd, rInPath.lastIndexOf('/')- nProjectInd); + + PoHeader aTmp(relativPath); + aPoOutPut.writeHeader(aTmp); + aPoOutPut.close(); +} + +bool fileExists(const OString& fileName) +{ + FILE *f = fopen(fileName.getStr(), "r"); + + if (f != nullptr) + { + fclose(f); + return true; + } + + return false; +} + +OString gDestRoot; + +bool handleFile(const OString& rProject, const OUString& rUrl, const OString& rPotDir) +{ + struct Command { + OUStringLiteral extension; + std::string executable; + bool positive; + }; + static Command const commands[] = { + { OUStringLiteral(".hrc"), "hrcex", false }, + { OUStringLiteral(".ulf"), "ulfex", false }, + { OUStringLiteral(".xcu"), "cfgex", false }, + { OUStringLiteral(".xrm"), "xrmex", false }, + { OUStringLiteral("description.xml"), "xrmex", true }, + { OUStringLiteral(".xhp"), "helpex", false }, + { OUStringLiteral(".properties"), "propex", false }, + { OUStringLiteral(".ui"), "uiex", false }, + { OUStringLiteral(".tree"), "treex", false } }; + for (size_t i = 0; i != SAL_N_ELEMENTS(commands); ++i) + { + if (rUrl.endsWith(commands[i].extension) && + (commands[i].executable != "propex" || rUrl.indexOf("en_US") != -1)) + { + if (commands[i].positive ? passesPositiveList(rUrl) : passesNegativeList(rUrl)) + { + //Get input file path + OString sInPath; + { + OUString sInPathTmp; + if (osl::FileBase::getSystemPathFromFileURL(rUrl, sInPathTmp) != + osl::FileBase::E_None) + { + cerr << "osl::FileBase::getSystemPathFromFileURL(" << rUrl << ") failed\n"; + throw false; //TODO + } + sInPath = OUStringToOString( sInPathTmp, RTL_TEXTENCODING_UTF8 ); + } + OString sOutPath; + if (commands[i].executable == "uiex" || commands[i].executable == "hrcex") + sOutPath = gDestRoot + "/" + rProject + "/messages.pot"; + else + sOutPath = rPotDir.concat(".pot"); + + if (!fileExists(sOutPath)) + InitPoFile(rProject, sInPath, rPotDir, sOutPath); + handleCommand(sInPath, sOutPath, commands[i].executable); + + { + //Delete pot file if it contain only the header + PoIfstream aPOStream(sOutPath); + PoEntry aPO; + aPOStream.readEntry( aPO ); + bool bDel = aPOStream.eof(); + aPOStream.close(); + if (bDel) + { + if ( system(OString("rm " + sOutPath).getStr()) != 0 ) + { + cerr + << "Error: Cannot remove entryless pot file: " + << sOutPath << "\n"; + throw false; //TODO + } + } + } + + return true; + } + break; + } + } + return false; +} + +void handleFilesOfDir( + std::vector<OUString>& aFiles, const OString& rProject, + const OString& rPotDir ) +{ + ///Handle files in lexical order + std::sort(aFiles.begin(), aFiles.end()); + + for (auto const& elem : aFiles) + handleFile(rProject, elem, rPotDir); +} + +bool includeProject(const OString& rProject) { + static const char *projects[] = { + "include", + "accessibility", + "avmedia", + "basctl", + "basic", + "chart2", + "connectivity", + "cui", + "dbaccess", + "desktop", + "dictionaries", + "editeng", + "extensions", + "extras", + "filter", + "forms", + "formula", + "fpicker", + "framework", + "helpcontent2", + "instsetoo_native", + "librelogo", + "mysqlc", + "nlpsolver", + "officecfg", + "oox", + "readlicense_oo", + "reportbuilder", + "reportdesign", + "sc", + "scaddins", + "sccomp", + "scp2", + "sd", + "sdext", + "setup_native", + "sfx2", + "shell", + "starmath", + "svl", + "svtools", + "svx", + "sw", + "swext", + "sysui", + "uui", + "vcl", + "wizards", + "writerperfect", + "xmlsecurity" }; + for (size_t i = 0; i != SAL_N_ELEMENTS(projects); ++i) { + if (rProject == projects[i]) { + return true; + } + } + return false; +} + +/// Handle one directory in the hierarchy. +/// +/// Ignores symlinks and instead explicitly descends into clone/* or src/*, +/// as the Cygwin symlinks are not supported by osl::Directory on Windows. +/// +/// @param rUrl the absolute file URL of this directory +/// +/// @param nLevel 0 if this is the root directory (core repository) +/// that contains the individual modules. 1 if it is a toplevel module and +/// larger values for the subdirectories. +/// +/// @param rProject the name of the project (empty and ignored if nLevel <= 0) +/// @param rPotDir the path of pot directory +void handleDirectory( + const OUString& rUrl, int nLevel, + const OString& rProject, const OString& rPotDir) +{ + osl::Directory dir(rUrl); + if (dir.open() != osl::FileBase::E_None) { + cerr + << "Error: Cannot open directory: " << rUrl << '\n'; + throw false; //TODO + } + std::vector<OUString> aFileNames; + std::map<OUString, std::map<OString, OString>> aSubDirs; + for (;;) { + osl::DirectoryItem item; + osl::FileBase::RC e = dir.getNextItem(item); + if (e == osl::FileBase::E_NOENT) { + break; + } + if (e != osl::FileBase::E_None) { + cerr << "Error: Cannot read directory\n"; + throw false; //TODO + } + osl::FileStatus stat( + osl_FileStatus_Mask_Type | osl_FileStatus_Mask_FileName + | osl_FileStatus_Mask_FileURL); + if (item.getFileStatus(stat) != osl::FileBase::E_None) { + cerr << "Error: Cannot get file status\n"; + throw false; //TODO + } + const OString sDirName = + OUStringToOString(stat.getFileName(),RTL_TEXTENCODING_UTF8); + switch (nLevel) + { + case 0: // a root directory + if (stat.getFileType() == osl::FileStatus::Directory && includeProject(sDirName)) + aSubDirs[stat.getFileURL()][sDirName] = rPotDir.concat("/").concat(sDirName); + break; + default: + if (stat.getFileType() == osl::FileStatus::Directory) + aSubDirs[stat.getFileURL()][rProject] = rPotDir.concat("/").concat(sDirName); + else + aFileNames.push_back(stat.getFileURL()); + break; + } + } + + OString aPotDir(rPotDir); + if( !aFileNames.empty() ) + { + OString aProject(rProject); + if (aProject == "include" && nLevel > 1) + { + aProject = aPotDir.copy(aPotDir.lastIndexOf('/') + 1); + aPotDir = aPotDir.copy(0, aPotDir.lastIndexOf("include")) + aProject + "/messages"; + } + if (aProject != "include") + { + handleFilesOfDir(aFileNames, aProject, aPotDir); + } + } + + if (dir.close() != osl::FileBase::E_None) { + cerr << "Error: Cannot close directory\n"; + throw false; //TODO + } + + for (auto const& elem : aSubDirs) + handleDirectory(elem.first, nLevel + 1, elem.second.begin()->first, + elem.second.begin()->second); + + //Remove empty pot directory + OUString sPoPath = + OStringToOUString( + aPotDir.copy(0,aPotDir.lastIndexOf('/')), RTL_TEXTENCODING_UTF8); + OUString sPoUrl; + if (osl::FileBase::getFileURLFromSystemPath(sPoPath, sPoUrl) + != osl::FileBase::E_None) + { + cerr + << ("Error: Cannot convert pathname to URL in " __FILE__ + ", in line ") + << __LINE__ << "\n" + << sPoPath + << "\n"; + throw false; //TODO + } + osl::Directory::remove(sPoUrl); +} + +void handleProjects(char const * sSourceRoot, char const * sDestRoot) +{ + OUString root16; + if (!rtl_convertStringToUString( + &root16.pData, sSourceRoot, rtl_str_getLength(sSourceRoot), + osl_getThreadTextEncoding(), + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR))) + { + cerr << "Error: Cannot convert pathname to UTF-16\n"; + throw false; //TODO + } + OUString rootUrl; + if (osl::FileBase::getFileURLFromSystemPath(root16, rootUrl) + != osl::FileBase::E_None) + { + cerr + << ("Error: Cannot convert pathname to URL in " __FILE__ + ", in line ") + << __LINE__ << "\n root16: " + << root16 + << "\n"; + throw false; //TODO + } + gDestRoot = OString(sDestRoot); + handleDirectory(rootUrl, 0, OString(), gDestRoot); +} +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + try + { + if (argc != 3) + { + cerr + << ("localize (c)2001 by Sun Microsystems\n\n" + "As part of the L10N framework, localize extracts en-US\n" + "strings for translation out of the toplevel modules defined\n" + "in projects array in l10ntools/source/localize.cxx.\n\n" + "Syntax: localize <source-root> <outfile>\n"); + exit(EXIT_FAILURE); + } + handleProjects(argv[1],argv[2]); + } + catch (std::exception& e) + { + cerr << "exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } + catch (bool) //TODO + { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/merge.cxx b/l10ntools/source/merge.cxx new file mode 100644 index 000000000..75afdf77b --- /dev/null +++ b/l10ntools/source/merge.cxx @@ -0,0 +1,331 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> +#include <sal/log.hxx> + +#include <algorithm> +#include <fstream> +#include <string> +#include <vector> + +#include <export.hxx> +#include <po.hxx> + +namespace +{ + OString lcl_NormalizeFilename(const OString& rFilename) + { + return rFilename.copy( + std::max( + rFilename.lastIndexOf( '\\' ), + rFilename.lastIndexOf( '/' ))+1); + }; + + bool lcl_ReadPoChecked( + PoEntry& o_rPoEntry, PoIfstream& rPoFile, + const OString& rFileName) + { + try + { + rPoFile.readEntry( o_rPoEntry ); + } + catch (const PoIfstream::Exception&) + { + SAL_WARN("l10ntools", rFileName << " contains invalid entry"); + return false; + } + return true; + } +} + + + + +ResData::ResData( const OString &rGId ) + : + sGId( rGId ) +{ + sGId = sGId.replaceAll("\r", OString()); +} + +ResData::ResData( const OString &rGId, const OString &rFilename) + : + sGId( rGId ), + sFilename( rFilename ) +{ + sGId = sGId.replaceAll("\r", OString()); +} + + + + +bool MergeEntrys::GetText( OString &rReturn, + const OString &nLangIndex, bool bDel ) +{ + bool bReturn = true; + rReturn = sText[ nLangIndex ]; + if ( bDel ) + sText[ nLangIndex ] = ""; + bReturn = bTextFirst[ nLangIndex ]; + bTextFirst[ nLangIndex ] = false; + return bReturn; +} + +namespace +{ + OString GetDoubleBars() + { + //DOUBLE VERTICAL LINE instead of || because the translations make their + //way into action_names under gtk3 where || is illegal + return OUStringToOString(OUString(u'\x2016'), RTL_TEXTENCODING_UTF8); + } +} + +OString MergeEntrys::GetQTZText(const ResData& rResData, const OString& rOrigText) +{ + const OString sFilename = rResData.sFilename.copy(rResData.sFilename.lastIndexOf('/')+1); + const OString sKey = + PoEntry::genKeyId(sFilename + rResData.sGId + rResData.sId + rResData.sResTyp + rOrigText); + return sKey + GetDoubleBars() + rOrigText; +} + + + +MergeDataFile::MergeDataFile( + const OString &rFileName, const OString &rFile, + bool bCaseSensitive, bool bWithQtz ) +{ + OString sEnableReleaseBuild(getenv("ENABLE_RELEASE_BUILD")); + + std::ifstream aInputStream( rFileName.getStr() ); + if ( !aInputStream.is_open() ) + { + SAL_WARN("l10ntools", "Can't open po path container file for " << rFileName); + return; + } + std::string sPoFile; + aInputStream >> sPoFile; + bool bFirstLang = true; + while( !aInputStream.eof() ) + { + bool bSkipCurrentPOFile = false; + const OString sFileName( lcl_NormalizeFilename(rFile) ); + const bool bReadAll = sFileName.isEmpty(); + // coverity[tainted_data] - this is a build time tool + const OString sPoFileName(sPoFile.data(), static_cast<sal_Int32>(sPoFile.length())); + PoIfstream aPoInput; + aPoInput.open( sPoFileName ); + if ( !aPoInput.isOpen() ) + { + SAL_WARN("l10ntools", "Can't open file: " << sPoFileName); + return; + } + + OString sLang; + //Get language id from path + { + const OString sTransSource("translations/source/"); + const sal_Int32 nStart = + sPoFileName.indexOf(sTransSource)+sTransSource.getLength(); + const sal_Int32 nCount = + sPoFileName.indexOf('/',nStart) - nStart; + sLang = sPoFileName.copy(nStart,nCount); + } + aLanguageSet.insert( sLang ); + PoEntry aNextPo; + do + { + if( !lcl_ReadPoChecked(aNextPo, aPoInput, sPoFileName) ) + { + bSkipCurrentPOFile = true; + break; + } + } while( !aPoInput.eof() && aNextPo.getSourceFile() != sFileName && !bReadAll ); + while( !aPoInput.eof() && (aNextPo.getSourceFile() == sFileName || bReadAll ) && !bSkipCurrentPOFile ) + { + PoEntry aActPo( aNextPo ); + + bool bInSameComp = false; + OString sText; + OString sQHText; + OString sTitle; + OString sExText; + OString sExQHText; + OString sExTitle; + do + { + if( bInSameComp ) + aActPo = aNextPo; + OString sTemp = aActPo.getMsgStr(); + if( aActPo.isFuzzy() || sTemp.isEmpty() ) + sTemp = aActPo.getMsgId(); + switch( aActPo.getType() ) + { + case PoEntry::TTEXT: + sText = sTemp; + sExText = aActPo.getMsgId(); + break; + case PoEntry::TQUICKHELPTEXT: + sQHText = sTemp; + sExQHText = aActPo.getMsgId(); + break; + case PoEntry::TTITLE: + sTitle = sTemp; + sExTitle = aActPo.getMsgId(); + break; + } + if( !lcl_ReadPoChecked(aNextPo, aPoInput, sPoFileName) ) + { + bSkipCurrentPOFile = true; + break; + } + if (aPoInput.eof()) + break; + bInSameComp = PoEntry::IsInSameComp(aActPo, aNextPo); + } while( bInSameComp ); + + InsertEntry( + aActPo.getResourceType(), aActPo.getGroupId(), + aActPo.getLocalId(), sLang, sText, + sQHText, sTitle, aActPo.getSourceFile(), + bFirstLang, bCaseSensitive ); + + if( bFirstLang && bWithQtz && + sEnableReleaseBuild != "TRUE" ) + { + aLanguageSet.insert("qtz"); + InsertEntry( + aActPo.getResourceType(), aActPo.getGroupId(), + aActPo.getLocalId(), "qtz", + sExText, sExQHText, + sExTitle, aActPo.getSourceFile(), + false, bCaseSensitive ); + } + } + aPoInput.close(); + aInputStream >> sPoFile; + bFirstLang = false; + } + aInputStream.close(); +} + +MergeDataFile::~MergeDataFile() +{ +} + +std::vector<OString> MergeDataFile::GetLanguages() const +{ + return std::vector<OString>(aLanguageSet.begin(),aLanguageSet.end()); +} + +MergeEntrys *MergeDataFile::GetMergeData( ResData *pResData , bool bCaseSensitive ) +{ + OString sOldG = pResData->sGId; + OString sOldL = pResData->sId; + OString sGID = pResData->sGId; + OString sLID; + if (sGID.isEmpty()) + sGID = pResData->sId; + else + sLID = pResData->sId; + pResData->sGId = sGID; + pResData->sId = sLID; + + OString sKey = CreateKey( pResData->sResTyp , pResData->sGId , pResData->sId , pResData->sFilename , bCaseSensitive ); + + auto mit = aMap.find( sKey ); + if(mit != aMap.end()) + { + pResData->sGId = sOldG; + pResData->sId = sOldL; + return mit->second.get(); + } + pResData->sGId = sOldG; + pResData->sId = sOldL; + return nullptr; +} + +MergeEntrys *MergeDataFile::GetMergeEntrys( ResData *pResData ) +{ + // search for requested MergeEntrys + return GetMergeData( pResData ); +} + +MergeEntrys *MergeDataFile::GetMergeEntrysCaseSensitive( ResData *pResData ) +{ + // search for requested MergeEntrys + return GetMergeData( pResData , true ); +} + +void MergeDataFile::InsertEntry( + const OString &rTYP, const OString &rGID, + const OString &rLID, const OString &nLANG, + const OString &rTEXT, const OString &rQHTEXT, + const OString &rTITLE, const OString &rInFilename, + bool bFirstLang, bool bCaseSensitive ) +{ + MergeEntrys *pMergeEntrys = nullptr; + + // search for MergeData + OString sKey = CreateKey(rTYP , rGID , rLID , rInFilename , bCaseSensitive); + + if( !bFirstLang ) + { + auto mit = aMap.find( sKey ); + if(mit != aMap.end()) + pMergeEntrys = mit->second.get(); + + } + + if( !pMergeEntrys ) + { + pMergeEntrys = new MergeEntrys; + aMap.emplace( sKey, std::unique_ptr<MergeEntrys>(pMergeEntrys) ); + } + + + // insert the cur string + if( nLANG =="qtz" ) + { + const OString sTemp = rInFilename + rGID + rLID + rTYP; + pMergeEntrys->InsertEntry( + nLANG, + rTEXT.isEmpty()? rTEXT : PoEntry::genKeyId(sTemp + rTEXT) + GetDoubleBars() + rTEXT, + rQHTEXT.isEmpty()? rQHTEXT : PoEntry::genKeyId(sTemp + rQHTEXT) + GetDoubleBars() + rQHTEXT, + rTITLE.isEmpty()? rTITLE : PoEntry::genKeyId(sTemp + rTITLE) + GetDoubleBars() + rTITLE ); + } + else + { + pMergeEntrys->InsertEntry( nLANG , rTEXT, rQHTEXT, rTITLE ); + } +} + +OString MergeDataFile::CreateKey(const OString& rTYP, const OString& rGID, + const OString& rLID, const OString& rFilename, bool bCaseSensitive) +{ + static const char sStroke[] = "-"; + OString sKey = rTYP + sStroke + rGID + sStroke + rLID + sStroke + + lcl_NormalizeFilename(rFilename); + if(bCaseSensitive) + return sKey; // officecfg case sensitive identifier + return sKey.toAsciiUpperCase(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/po.cxx b/l10ntools/source/po.cxx new file mode 100644 index 000000000..4df1e1782 --- /dev/null +++ b/l10ntools/source/po.cxx @@ -0,0 +1,647 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <rtl/ustring.hxx> +#include <rtl/crc.h> +#include <sal/log.hxx> + +#include <cstring> +#include <ctime> +#include <cassert> + +#include <vector> +#include <string> + +#include <po.hxx> +#include <helper.hxx> + +/** Container of po entry + + Provide all file operations related to LibreOffice specific + po entry and store it's attributes. +*/ +class GenPoEntry +{ +private: + OStringBuffer m_sExtractCom; + std::vector<OString> m_sReferences; + OString m_sMsgCtxt; + OString m_sMsgId; + OString m_sMsgIdPlural; + OString m_sMsgStr; + std::vector<OString> m_sMsgStrPlural; + bool m_bFuzzy; + bool m_bCFormat; + bool m_bNull; + +public: + GenPoEntry(); + + const std::vector<OString>& getReference() const { return m_sReferences; } + const OString& getMsgCtxt() const { return m_sMsgCtxt; } + const OString& getMsgId() const { return m_sMsgId; } + const OString& getMsgStr() const { return m_sMsgStr; } + bool isFuzzy() const { return m_bFuzzy; } + bool isNull() const { return m_bNull; } + + void setExtractCom(const OString& rExtractCom) + { + m_sExtractCom = rExtractCom; + } + void setReference(const OString& rReference) + { + m_sReferences.push_back(rReference); + } + void setMsgCtxt(const OString& rMsgCtxt) + { + m_sMsgCtxt = rMsgCtxt; + } + void setMsgId(const OString& rMsgId) + { + m_sMsgId = rMsgId; + } + void setMsgStr(const OString& rMsgStr) + { + m_sMsgStr = rMsgStr; + } + + void writeToFile(std::ofstream& rOFStream) const; + void readFromFile(std::ifstream& rIFStream); +}; + +namespace +{ + // Convert a normal string to msg/po output string + OString lcl_GenMsgString(const OString& rString) + { + if ( rString.isEmpty() ) + return "\"\""; + + OString sResult = + "\"" + + helper::escapeAll(rString,"\n""\t""\r""\\""\"","\\n""\\t""\\r""\\\\""\\\"") + + "\""; + sal_Int32 nIndex = 0; + while((nIndex=sResult.indexOf("\\n",nIndex))!=-1) + { + if( !sResult.match("\\\\n", nIndex-1) && + nIndex!=sResult.getLength()-3) + { + sResult = sResult.replaceAt(nIndex,2,"\\n\"\n\""); + } + ++nIndex; + } + + if ( sResult.indexOf('\n') != -1 ) + return "\"\"\n" + sResult; + + return sResult; + } + + // Convert msg string to normal form + OString lcl_GenNormString(const OString& rString) + { + return + helper::unEscapeAll( + rString.copy(1,rString.getLength()-2), + "\\n""\\t""\\r""\\\\""\\\"", + "\n""\t""\r""\\""\""); + } +} + +GenPoEntry::GenPoEntry() + : m_sExtractCom( OString() ) + , m_sReferences( std::vector<OString>() ) + , m_sMsgCtxt( OString() ) + , m_sMsgId( OString() ) + , m_sMsgIdPlural( OString() ) + , m_sMsgStr( OString() ) + , m_sMsgStrPlural( std::vector<OString>() ) + , m_bFuzzy( false ) + , m_bCFormat( false ) + , m_bNull( false ) +{ +} + +void GenPoEntry::writeToFile(std::ofstream& rOFStream) const +{ + if ( rOFStream.tellp() != std::ofstream::pos_type( 0 )) + rOFStream << std::endl; + if ( !m_sExtractCom.isEmpty() ) + rOFStream + << "#. " + << m_sExtractCom.toString().replaceAll("\n","\n#. ") << std::endl; + for(const auto& rReference : m_sReferences) + rOFStream << "#: " << rReference << std::endl; + if ( m_bFuzzy ) + rOFStream << "#, fuzzy" << std::endl; + if ( m_bCFormat ) + rOFStream << "#, c-format" << std::endl; + if ( !m_sMsgCtxt.isEmpty() ) + rOFStream << "msgctxt " + << lcl_GenMsgString(m_sMsgCtxt) + << std::endl; + rOFStream << "msgid " + << lcl_GenMsgString(m_sMsgId) << std::endl; + if ( !m_sMsgIdPlural.isEmpty() ) + rOFStream << "msgid_plural " + << lcl_GenMsgString(m_sMsgIdPlural) + << std::endl; + if ( !m_sMsgStrPlural.empty() ) + for(auto & line : m_sMsgStrPlural) + rOFStream << line.copy(0,10) << lcl_GenMsgString(line.copy(10)) << std::endl; + else + rOFStream << "msgstr " + << lcl_GenMsgString(m_sMsgStr) << std::endl; +} + +void GenPoEntry::readFromFile(std::ifstream& rIFStream) +{ + *this = GenPoEntry(); + OString* pLastMsg = nullptr; + std::string sTemp; + getline(rIFStream,sTemp); + if( rIFStream.eof() || sTemp.empty() ) + { + m_bNull = true; + return; + } + while(!rIFStream.eof()) + { + OString sLine(sTemp.data(),sTemp.length()); + if (sLine.startsWith("#. ")) + { + if( !m_sExtractCom.isEmpty() ) + { + m_sExtractCom.append("\n"); + } + m_sExtractCom.append(sLine.copy(3)); + } + else if (sLine.startsWith("#: ")) + { + m_sReferences.push_back(sLine.copy(3)); + } + else if (sLine.startsWith("#, fuzzy")) + { + m_bFuzzy = true; + } + else if (sLine.startsWith("#, c-format")) + { + m_bCFormat = true; + } + else if (sLine.startsWith("msgctxt ")) + { + m_sMsgCtxt = lcl_GenNormString(sLine.copy(8)); + pLastMsg = &m_sMsgCtxt; + } + else if (sLine.startsWith("msgid ")) + { + m_sMsgId = lcl_GenNormString(sLine.copy(6)); + pLastMsg = &m_sMsgId; + } + else if (sLine.startsWith("msgid_plural ")) + { + m_sMsgIdPlural = lcl_GenNormString(sLine.copy(13)); + pLastMsg = &m_sMsgIdPlural; + } + else if (sLine.startsWith("msgstr ")) + { + m_sMsgStr = lcl_GenNormString(sLine.copy(7)); + pLastMsg = &m_sMsgStr; + } + else if (sLine.startsWith("msgstr[")) + { + // assume there are no more than 10 plural forms... + // and that plural strings are never split to multi-line in po + m_sMsgStrPlural.push_back(sLine.copy(0,10) + lcl_GenNormString(sLine.copy(10))); + } + else if (sLine.startsWith("\"") && pLastMsg) + { + OString sReference; + if (!m_sReferences.empty()) + { + sReference = m_sReferences.front(); + } + if (pLastMsg != &m_sMsgCtxt || sLine != "\"" + sReference + "\\n\"") + { + *pLastMsg += lcl_GenNormString(sLine); + } + } + else + break; + getline(rIFStream,sTemp); + } + } + +PoEntry::PoEntry() + : m_bIsInitialized( false ) +{ +} + +PoEntry::PoEntry( + const OString& rSourceFile, const OString& rResType, const OString& rGroupId, + const OString& rLocalId, const OString& rHelpText, + const OString& rText, const TYPE eType ) + : m_bIsInitialized( false ) +{ + if( rSourceFile.isEmpty() ) + throw NOSOURCFILE; + else if ( rResType.isEmpty() ) + throw NORESTYPE; + else if ( rGroupId.isEmpty() ) + throw NOGROUPID; + else if ( rText.isEmpty() ) + throw NOSTRING; + else if ( rHelpText.getLength() == 5 ) + throw WRONGHELPTEXT; + + m_pGenPo.reset( new GenPoEntry() ); + OString sReference = rSourceFile.copy(rSourceFile.lastIndexOf('/')+1); + m_pGenPo->setReference(sReference); + + OString sMsgCtxt = + sReference + "\n" + + rGroupId + "\n" + + (rLocalId.isEmpty() ? OString() : rLocalId + "\n") + + rResType; + switch(eType){ + case TTEXT: + sMsgCtxt += ".text"; break; + case TQUICKHELPTEXT: + sMsgCtxt += ".quickhelptext"; break; + case TTITLE: + sMsgCtxt += ".title"; break; + // Default case is unneeded because the type of eType has only three element + } + m_pGenPo->setMsgCtxt(sMsgCtxt); + m_pGenPo->setMsgId(rText); + m_pGenPo->setExtractCom( + ( !rHelpText.isEmpty() ? rHelpText + "\n" : OString()) + + genKeyId( m_pGenPo->getReference().front() + rGroupId + rLocalId + rResType + rText ) ); + m_bIsInitialized = true; +} + +PoEntry::~PoEntry() +{ +} + +PoEntry::PoEntry( const PoEntry& rPo ) + : m_pGenPo( rPo.m_pGenPo ? new GenPoEntry( *(rPo.m_pGenPo) ) : nullptr ) + , m_bIsInitialized( rPo.m_bIsInitialized ) +{ +} + +PoEntry& PoEntry::operator=(const PoEntry& rPo) +{ + if( this == &rPo ) + { + return *this; + } + if( rPo.m_pGenPo ) + { + if( m_pGenPo ) + { + *m_pGenPo = *(rPo.m_pGenPo); + } + else + { + m_pGenPo.reset( new GenPoEntry( *(rPo.m_pGenPo) ) ); + } + } + else + { + m_pGenPo.reset(); + } + m_bIsInitialized = rPo.m_bIsInitialized; + return *this; +} + +PoEntry& PoEntry::operator=(PoEntry&& rPo) noexcept +{ + m_pGenPo = std::move(rPo.m_pGenPo); + m_bIsInitialized = std::move(rPo.m_bIsInitialized); + return *this; +} + +OString const & PoEntry::getSourceFile() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getReference().front(); +} + +OString PoEntry::getGroupId() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgCtxt().getToken(0,'\n'); +} + +OString PoEntry::getLocalId() const +{ + assert( m_bIsInitialized ); + const OString sMsgCtxt = m_pGenPo->getMsgCtxt(); + if (sMsgCtxt.indexOf('\n')==sMsgCtxt.lastIndexOf('\n')) + return OString(); + else + return sMsgCtxt.getToken(1,'\n'); +} + +OString PoEntry::getResourceType() const +{ + assert( m_bIsInitialized ); + const OString sMsgCtxt = m_pGenPo->getMsgCtxt(); + if (sMsgCtxt.indexOf('\n')==sMsgCtxt.lastIndexOf('\n')) + return sMsgCtxt.getToken(1,'\n').getToken(0,'.'); + else + return sMsgCtxt.getToken(2,'\n').getToken(0,'.'); +} + +PoEntry::TYPE PoEntry::getType() const +{ + assert( m_bIsInitialized ); + const OString sMsgCtxt = m_pGenPo->getMsgCtxt(); + const OString sType = sMsgCtxt.copy( sMsgCtxt.lastIndexOf('.') + 1 ); + assert( + (sType == "text" || sType == "quickhelptext" || sType == "title") ); + if ( sType == "text" ) + return TTEXT; + else if ( sType == "quickhelptext" ) + return TQUICKHELPTEXT; + else + return TTITLE; +} + +bool PoEntry::isFuzzy() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->isFuzzy(); +} + +// Get message context +const OString& PoEntry::getMsgCtxt() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgCtxt(); + +} + +// Get translation string in merge format +OString const & PoEntry::getMsgId() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgId(); +} + +// Get translated string in merge format +const OString& PoEntry::getMsgStr() const +{ + assert( m_bIsInitialized ); + return m_pGenPo->getMsgStr(); + +} + +bool PoEntry::IsInSameComp(const PoEntry& rPo1,const PoEntry& rPo2) +{ + assert( rPo1.m_bIsInitialized && rPo2.m_bIsInitialized ); + return ( rPo1.getSourceFile() == rPo2.getSourceFile() && + rPo1.getGroupId() == rPo2.getGroupId() && + rPo1.getLocalId() == rPo2.getLocalId() && + rPo1.getResourceType() == rPo2.getResourceType() ); +} + +OString PoEntry::genKeyId(const OString& rGenerator) +{ + sal_uInt32 nCRC = rtl_crc32(0, rGenerator.getStr(), rGenerator.getLength()); + // Use simple ASCII characters, exclude I, l, 1 and O, 0 to avoid confusing IDs + static const char sSymbols[] = + "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz23456789"; + char sKeyId[6]; + for( short nKeyInd = 0; nKeyInd < 5; ++nKeyInd ) + { + sKeyId[nKeyInd] = sSymbols[(nCRC & 63) % strlen(sSymbols)]; + nCRC >>= 6; + } + sKeyId[5] = '\0'; + return sKeyId; +} + +namespace +{ + // Get actual time in "YEAR-MO-DA HO:MI+ZONE" form + OString lcl_GetTime() + { + time_t aNow = time(nullptr); + struct tm* pNow = localtime(&aNow); + char pBuff[50]; + strftime( pBuff, sizeof pBuff, "%Y-%m-%d %H:%M%z", pNow ); + return pBuff; + } +} + +// when updating existing files (pocheck), reuse provided po-header +PoHeader::PoHeader( const OString& rExtSrc, const OString& rPoHeaderMsgStr ) + : m_pGenPo( new GenPoEntry() ) + , m_bIsInitialized( false ) +{ + m_pGenPo->setExtractCom("extracted from " + rExtSrc); + m_pGenPo->setMsgStr(rPoHeaderMsgStr); + m_bIsInitialized = true; +} + +PoHeader::PoHeader( const OString& rExtSrc ) + : m_pGenPo( new GenPoEntry() ) + , m_bIsInitialized( false ) +{ + m_pGenPo->setExtractCom("extracted from " + rExtSrc); + m_pGenPo->setMsgStr( + "Project-Id-Version: PACKAGE VERSION\n" + "Report-Msgid-Bugs-To: https://bugs.libreoffice.org/enter_bug.cgi?" + "product=LibreOffice&bug_status=UNCONFIRMED&component=UI\n" + "POT-Creation-Date: " + lcl_GetTime() + + "\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" + "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" + "Language-Team: LANGUAGE <LL@li.org>\n" + "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=UTF-8\n" + "Content-Transfer-Encoding: 8bit\n" + "X-Accelerator-Marker: ~\n" + "X-Generator: LibreOffice\n"); + m_bIsInitialized = true; +} + +PoHeader::~PoHeader() +{ +} + +PoOfstream::PoOfstream() + : m_aOutPut() + , m_bIsAfterHeader( false ) +{ +} + +PoOfstream::PoOfstream(const OString& rFileName, OpenMode aMode ) + : m_aOutPut() + , m_bIsAfterHeader( false ) +{ + open( rFileName, aMode ); +} + +PoOfstream::~PoOfstream() +{ + if( isOpen() ) + { + close(); + } +} + +void PoOfstream::open(const OString& rFileName, OpenMode aMode ) +{ + assert( !isOpen() ); + if( aMode == TRUNC ) + { + m_aOutPut.open( rFileName.getStr(), + std::ios_base::out | std::ios_base::trunc ); + m_bIsAfterHeader = false; + } + else if( aMode == APP ) + { + m_aOutPut.open( rFileName.getStr(), + std::ios_base::out | std::ios_base::app ); + m_bIsAfterHeader = m_aOutPut.tellp() != std::ofstream::pos_type( 0 ); + } +} + +void PoOfstream::close() +{ + assert( isOpen() ); + m_aOutPut.close(); +} + +void PoOfstream::writeHeader(const PoHeader& rPoHeader) +{ + assert( isOpen() && !m_bIsAfterHeader && rPoHeader.m_bIsInitialized ); + rPoHeader.m_pGenPo->writeToFile( m_aOutPut ); + m_bIsAfterHeader = true; +} + +void PoOfstream::writeEntry( const PoEntry& rPoEntry ) +{ + assert( isOpen() && m_bIsAfterHeader && rPoEntry.m_bIsInitialized ); + rPoEntry.m_pGenPo->writeToFile( m_aOutPut ); +} + +namespace +{ + +// Check the validity of read entry +bool lcl_CheckInputEntry(const GenPoEntry& rEntry) +{ + return !rEntry.getReference().empty() && + !rEntry.getMsgCtxt().isEmpty() && + !rEntry.getMsgId().isEmpty(); +} + +} + +PoIfstream::PoIfstream() + : m_aInPut() + , m_bEof( false ) +{ +} + +PoIfstream::PoIfstream(const OString& rFileName) + : m_aInPut() + , m_bEof( false ) +{ + open( rFileName ); +} + +PoIfstream::~PoIfstream() +{ + if( isOpen() ) + { + close(); + } +} + +void PoIfstream::open( const OString& rFileName, OString& rPoHeader ) +{ + assert( !isOpen() ); + m_aInPut.open( rFileName.getStr(), std::ios_base::in ); + + // capture header, updating timestamp and generator + std::string sTemp; + std::getline(m_aInPut,sTemp); + while( !sTemp.empty() && !m_aInPut.eof() ) + { + std::getline(m_aInPut,sTemp); + OString sLine(sTemp.data(),sTemp.length()); + if (sLine.startsWith("\"PO-Revision-Date")) + rPoHeader += "PO-Revision-Date: " + lcl_GetTime() + "\n"; + else if (sLine.startsWith("\"X-Generator")) + rPoHeader += "X-Generator: LibreOffice\n"; + else if (sLine.startsWith("\"")) + rPoHeader += lcl_GenNormString(sLine); + } + m_bEof = false; +} + +void PoIfstream::open( const OString& rFileName ) +{ + assert( !isOpen() ); + m_aInPut.open( rFileName.getStr(), std::ios_base::in ); + + // Skip header + std::string sTemp; + std::getline(m_aInPut,sTemp); + while( !sTemp.empty() && !m_aInPut.eof() ) + { + std::getline(m_aInPut,sTemp); + } + m_bEof = false; +} + +void PoIfstream::close() +{ + assert( isOpen() ); + m_aInPut.close(); +} + +void PoIfstream::readEntry( PoEntry& rPoEntry ) +{ + assert( isOpen() && !eof() ); + GenPoEntry aGenPo; + aGenPo.readFromFile( m_aInPut ); + if( aGenPo.isNull() ) + { + m_bEof = true; + rPoEntry = PoEntry(); + } + else + { + if( lcl_CheckInputEntry(aGenPo) ) + { + if( rPoEntry.m_pGenPo ) + { + *(rPoEntry.m_pGenPo) = aGenPo; + } + else + { + rPoEntry.m_pGenPo.reset( new GenPoEntry( aGenPo ) ); + } + rPoEntry.m_bIsInitialized = true; + } + else + { + SAL_WARN("l10ntools", "Parse problem with entry: " << aGenPo.getMsgStr()); + throw PoIfstream::Exception(); + } + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/pocheck.cxx b/l10ntools/source/pocheck.cxx new file mode 100644 index 000000000..6f3252451 --- /dev/null +++ b/l10ntools/source/pocheck.cxx @@ -0,0 +1,425 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <iostream> +#include <map> +#include <vector> +#include <rtl/string.hxx> +#include <rtl/ustring.hxx> +#include <osl/file.hxx> +#include <po.hxx> + +// Translated style names must be unique +static void checkStyleNames(const OString& aLanguage) +{ + std::map<OString,sal_uInt16> aLocalizedStyleNames; + std::map<OString,sal_uInt16> aLocalizedNumStyleNames; + std::vector<PoEntry> repeatedEntries; + + OString aPoPath = getenv("SRC_ROOT") + + OStringLiteral("/translations/source/") + + aLanguage + "/sw/messages.po"; + PoIfstream aPoInput; + aPoInput.open(aPoPath); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + bool bRepeated = false; + if( aPoInput.eof() ) + { + break; + } + + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt().startsWith("STR_POOLCOLL") ) + { + const OString& aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedStyleNames.find(aMsgStr) == aLocalizedStyleNames.end() ) + aLocalizedStyleNames[aMsgStr] = 1; + else { + aLocalizedStyleNames[aMsgStr]++; + bRepeated = true; + } + } + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt().startsWith("STR_POOLNUMRULE") ) + { + const OString& aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedNumStyleNames.find(aMsgStr) == aLocalizedNumStyleNames.end() ) + aLocalizedNumStyleNames[aMsgStr] = 1; + else { + aLocalizedNumStyleNames[aMsgStr]++; + bRepeated = true; + } + } + if (bRepeated) + repeatedEntries.push_back(aPoEntry); + } + aPoInput.close(); + + for (auto const& localizedStyleName : aLocalizedStyleNames) + { + if( localizedStyleName.second > 1 ) + { + std::cout << "ERROR: Style name translations must be unique in:\n" << + aPoPath << "\nLanguage: " << aLanguage << "\nDuplicated translation is: " << localizedStyleName.first << + "\nSee STR_POOLCOLL_*\n\n"; + } + } + for (auto const& localizedNumStyleName : aLocalizedNumStyleNames) + { + if( localizedNumStyleName.second > 1 ) + { + std::cout << "ERROR: Style name translations must be unique in:\n" << + aPoPath << "\nLanguage: " << aLanguage << "\nDuplicated translation is: " << localizedNumStyleName.first << + "\nSee STR_POOLNUMRULE_*\n\n"; + } + } + OString sPoHdrMsg; + aPoInput.open(aPoPath, sPoHdrMsg); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + PoOfstream aPoOutput; + aPoOutput.open(aPoPath+".new"); + PoHeader aTmp("sw/inc", sPoHdrMsg); + aPoOutput.writeHeader(aTmp); + bool bAnyError = false; + + for(;;) + { + PoEntry aPoEntry; + bool bError = false; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + for (auto const& repeatedEntry : repeatedEntries) + { + if (repeatedEntry.getMsgId() == aPoEntry.getMsgId() && repeatedEntry.getMsgCtxt() == aPoEntry.getMsgCtxt()) { + bError = true; + break; + } + } + if (bError) { + bAnyError = true; + } else { + aPoOutput.writeEntry(aPoEntry); + } + } + aPoInput.close(); + aPoOutput.close(); + OUString aPoPathURL; + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPath, RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bAnyError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); +} + +// Translated spreadsheet function names must be unique +static void checkFunctionNames(const OString& aLanguage) +{ + std::map<OString,sal_uInt16> aLocalizedFunctionNames; + std::map<OString,sal_uInt16> aLocalizedCoreFunctionNames; + + std::vector<PoEntry> repeatedEntries; + + OString aPoPaths[2]; + OUString aPoPathURL; + + aPoPaths[0] = getenv("SRC_ROOT") + + OStringLiteral("/translations/source/") + + aLanguage + + "/formula/messages.po"; + PoIfstream aPoInput; + OString sPoHdrMsg; + aPoInput.open(aPoPaths[0], sPoHdrMsg); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPaths[0] << std::endl; + return; + } + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt() == "RID_STRLIST_FUNCTION_NAMES" ) + { + const OString& aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedCoreFunctionNames.find(aMsgStr) == aLocalizedCoreFunctionNames.end() ) + aLocalizedCoreFunctionNames[aMsgStr] = 1; + if( aLocalizedFunctionNames.find(aMsgStr) == aLocalizedFunctionNames.end() ) { + aLocalizedFunctionNames[aMsgStr] = 1; + } else { + aLocalizedFunctionNames[aMsgStr]++; + repeatedEntries.push_back(aPoEntry); + } + } + } + aPoInput.close(); + + aPoPaths[1] = getenv("SRC_ROOT") + + OStringLiteral("/translations/source/") + + aLanguage + + "/scaddins/messages.po"; + aPoInput.open(aPoPaths[1]); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPaths[1] << std::endl; + return; + } + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgCtxt().startsWith("ANALYSIS_FUNCNAME") ) + { + OString aMsgStr = aPoEntry.getMsgStr(); + if( aMsgStr.isEmpty() ) + continue; + if( aLocalizedCoreFunctionNames.find(aMsgStr) != aLocalizedCoreFunctionNames.end() ) + aMsgStr += "_ADD"; + if( aLocalizedFunctionNames.find(aMsgStr) == aLocalizedFunctionNames.end() ) { + aLocalizedFunctionNames[aMsgStr] = 1; + } else { + aLocalizedFunctionNames[aMsgStr]++; + repeatedEntries.push_back(aPoEntry); + } + } + } + aPoInput.close(); + + for (auto const& localizedFunctionName : aLocalizedFunctionNames) + { + if( localizedFunctionName.second > 1 ) + { + std::cout + << ("ERROR: Spreadsheet function name translations must be" + " unique.\nLanguage: ") + << aLanguage << "\nDuplicated translation is: " << localizedFunctionName.first + << "\n\n"; + } + } + + for (int i=0;i<2;i++) + { + aPoInput.open(aPoPaths[i]); + if( !aPoInput.isOpen() ) + std::cerr << "Warning: Cannot open " << aPoPaths[i] << std::endl; + PoOfstream aPoOutput; + aPoOutput.open(aPoPaths[i]+".new"); + + switch (i) + { + case 0: + { + PoHeader hd("formula/inc", sPoHdrMsg); + aPoOutput.writeHeader(hd); + break; + } + case 1: + { + PoHeader hd("scaddins/inc", sPoHdrMsg); + aPoOutput.writeHeader(hd); + break; + } + } + bool bAnyError = false; + + for(;;) + { + PoEntry aPoEntry; + bool bError = false; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + for (auto const& repeatedEntry : repeatedEntries) + { + if (repeatedEntry.getMsgId() == aPoEntry.getMsgId() && repeatedEntry.getMsgCtxt() == aPoEntry.getMsgCtxt()) + { + bError = true; + break; + } + } + if (bError) + { + bAnyError = true; + } + else + { + aPoOutput.writeEntry(aPoEntry); + } + } + aPoInput.close(); + aPoOutput.close(); + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPaths[i], RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bAnyError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); + } +} + +// In instsetoo_native/inc_openoffice/windows/msi_languages.po +// where an en-US string ends with '|', translation must end +// with '|', too. +static void checkVerticalBar(const OString& aLanguage) +{ + OString aPoPath = getenv("SRC_ROOT") + + OStringLiteral("/translations/source/") + + aLanguage + + "/instsetoo_native/inc_openoffice/windows/msi_languages.po"; + PoIfstream aPoInput; + aPoInput.open(aPoPath); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + PoOfstream aPoOutput; + aPoOutput.open(aPoPath+".new"); + PoHeader aTmp("instsetoo_native/inc_openoffice/windows/msi_languages"); + aPoOutput.writeHeader(aTmp); + bool bError = false; + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getMsgId().endsWith("|") && + !aPoEntry.getMsgStr().isEmpty() && !aPoEntry.getMsgStr().endsWith("|") ) + { + std::cout + << ("ERROR: Missing '|' character at the end of translated" + " string.\nIt causes runtime error in installer.\nFile: ") + << aPoPath << std::endl + << "Language: " << aLanguage << std::endl + << "English: " << aPoEntry.getMsgId() << std::endl + << "Localized: " << aPoEntry.getMsgStr() << std::endl + << std::endl; + bError = true; + } + else + aPoOutput.writeEntry(aPoEntry); + } + aPoInput.close(); + aPoOutput.close(); + OUString aPoPathURL; + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPath, RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); +} + +// In starmath/source.po Math symbol names (from symbol.src) +// must not contain spaces +static void checkMathSymbolNames(const OString& aLanguage) +{ + OString aPoPath = getenv("SRC_ROOT") + + OStringLiteral("/translations/source/") + + aLanguage + + "/starmath/messages.po"; + PoIfstream aPoInput; + aPoInput.open(aPoPath); + if( !aPoInput.isOpen() ) + { + std::cerr << "Warning: Cannot open " << aPoPath << std::endl; + return; + } + PoOfstream aPoOutput; + aPoOutput.open(aPoPath+".new"); + PoHeader aTmp("starmath/inc"); + aPoOutput.writeHeader(aTmp); + bool bError = false; + + for(;;) + { + PoEntry aPoEntry; + aPoInput.readEntry(aPoEntry); + if( aPoInput.eof() ) + break; + if( !aPoEntry.isFuzzy() && aPoEntry.getGroupId() == "RID_UI_SYMBOL_NAMES" && + !aPoEntry.getMsgStr().isEmpty() && (aPoEntry.getMsgStr().indexOf(" ") != -1) ) + { + std::cout + << "ERROR: Math symbol names must not contain spaces.\nFile: " + << aPoPath << std::endl + << "Language: " << aLanguage << std::endl + << "English: " << aPoEntry.getMsgId() << std::endl + << "Localized: " << aPoEntry.getMsgStr() << std::endl + << std::endl; + bError = true; + } + else + aPoOutput.writeEntry(aPoEntry); + } + aPoInput.close(); + aPoOutput.close(); + OUString aPoPathURL; + osl::FileBase::getFileURLFromSystemPath(OStringToOUString(aPoPath, RTL_TEXTENCODING_UTF8), aPoPathURL); + if( bError ) + osl::File::move(aPoPathURL + ".new", aPoPathURL); + else + osl::File::remove(aPoPathURL + ".new"); +} + +int main() +{ + try + { + OString aLanguages(getenv("ALL_LANGS")); + if( aLanguages.isEmpty() ) + { + std::cerr << "Usage: LD_LIBRARY_PATH=instdir/program make cmd cmd=workdir/LinkTarget/Executable/pocheck\n"; + return 1; + } + for(sal_Int32 i = 1;;++i) // skip en-US + { + OString aLanguage = aLanguages.getToken(i,' '); + if( aLanguage.isEmpty() ) + break; + if( aLanguage == "qtz" ) + continue; + checkStyleNames(aLanguage); + checkFunctionNames(aLanguage); + checkVerticalBar(aLanguage); + checkMathSymbolNames(aLanguage); + } + return 0; + } + catch (std::exception& e) + { + std::cerr << "pocheck: exception " << e.what() << std::endl; + return 1; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/propex.cxx b/l10ntools/source/propex.cxx new file mode 100644 index 000000000..32d2e3d3c --- /dev/null +++ b/l10ntools/source/propex.cxx @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/main.h> + +#include <common.hxx> +#include <propmerge.hxx> + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + common::HandledArgs aArgs; + if( !common::handleArguments(argc, argv, aArgs) ) + { + common::writeUsage("propex","*.properties"); + return 1; + } + + PropParser aParser( + aArgs.m_sInputFile, aArgs.m_sLanguage, aArgs.m_bMergeMode ); + if( !aParser.isInitialized() ) + { + return 1; + } + if( aArgs.m_bMergeMode ) + { + aParser.Merge( aArgs.m_sMergeSrc, aArgs.m_sOutputFile ); + } + else + { + aParser.Extract( aArgs.m_sOutputFile ); + } + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/propmerge.cxx b/l10ntools/source/propmerge.cxx new file mode 100644 index 000000000..42945aee3 --- /dev/null +++ b/l10ntools/source/propmerge.cxx @@ -0,0 +1,229 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <memory> +#include <cstdlib> +#include <cassert> +#include <iostream> +#include <fstream> +#include <iomanip> + +#include <export.hxx> +#include <common.hxx> +#include <propmerge.hxx> + +namespace +{ + //Find ascii escaped unicode + sal_Int32 lcl_IndexOfUnicode( + const OString& rSource, const sal_Int32 nFrom = 0 ) + { + const OString sHexDigits = "0123456789abcdefABCDEF"; + sal_Int32 nIndex = rSource.indexOf( "\\u", nFrom ); + if( nIndex == -1 ) + { + return -1; + } + bool bIsUnicode = true; + for( short nDist = 2; nDist <= 5; ++nDist ) + { + if( sHexDigits.indexOf( rSource[nIndex + nDist] ) == -1 ) + { + bIsUnicode = false; + } + } + return bIsUnicode ? nIndex : -1; + } + + //Convert ascii escaped unicode to utf-8 + OString lcl_ConvertToUTF8( const OString& rText ) + { + OString sResult = rText; + sal_Int32 nIndex = lcl_IndexOfUnicode( sResult ); + while( nIndex != -1 && nIndex < rText.getLength() ) + { + const OString sHex = sResult.copy( nIndex + 2, 4 ); + const sal_Unicode cDec = + static_cast<sal_Unicode>( strtol( sHex.getStr(), nullptr, 16 ) ); + const OString sNewChar( &cDec, 1, RTL_TEXTENCODING_UTF8 ); + sResult = sResult.replaceAll( "\\u" + sHex, sNewChar ); + nIndex = lcl_IndexOfUnicode( sResult, nIndex ); + } + return sResult; + } + + //Escape unicode characters + void lcl_PrintJavaStyle( const OString& rText, std::ofstream &rOfstream ) + { + const OUString sTemp = + OStringToOUString( rText, RTL_TEXTENCODING_UTF8 ); + for ( sal_Int32 nIndex = 0; nIndex < sTemp.getLength(); ++nIndex ) + { + sal_Unicode cUniCode = sTemp[nIndex]; + if( cUniCode < 128 ) + { + rOfstream << static_cast<char>( cUniCode ); + } + else + { + rOfstream + << "\\u" + << std::setfill('0') << std::setw(2) << std::uppercase + << std::hex << (cUniCode >> 8) + << std::setfill('0') << std::setw(2) << std::uppercase + << std::hex << (cUniCode & 0xFF); + } + } + } +} + +//Open source file and store its lines +PropParser::PropParser( + const OString& rInputFile, const OString& rLang, + const bool bMergeMode ) + : m_vLines( std::vector<OString>() ) + , m_sSource( rInputFile ) + , m_sLang( rLang ) + , m_bIsInitialized( false ) +{ + std::ifstream aIfstream( m_sSource.getStr() ); + if( aIfstream.is_open() ) + { + std::string s; + std::getline( aIfstream, s ); + while( !aIfstream.eof() ) + { + OString sLine( s.data(), s.length() ); + if( bMergeMode || + ( !sLine.startsWith(" *") && !sLine.startsWith("/*") ) ) + { + m_vLines.push_back( sLine ); + } + std::getline( aIfstream, s ); + } + } + else + { + std::cerr + << "Propex error: Cannot open source file: " + << m_sSource << std::endl; + return; + } + m_bIsInitialized = true; +} + +PropParser::~PropParser() +{ +} + +//Extract strings form source file +void PropParser::Extract( const OString& rPOFile ) +{ + assert( m_bIsInitialized ); + PoOfstream aPOStream( rPOFile, PoOfstream::APP ); + if( !aPOStream.isOpen() ) + { + std::cerr + << "Propex error: Cannot open pofile for extract: " + << rPOFile << std::endl; + return; + } + + for( size_t nIndex = 0; nIndex < m_vLines.size(); ++nIndex ) + { + const OString sLine = m_vLines[nIndex]; + const sal_Int32 nEqualSign = sLine.indexOf('='); + if( nEqualSign != -1 ) + { + OString sID = sLine.copy( 0, nEqualSign ).trim(); + OString sText = lcl_ConvertToUTF8( sLine.copy( nEqualSign + 1 ).trim() ); + + common::writePoEntry( + "Propex", aPOStream, m_sSource, "property", + sID, OString(), OString(), sText); + } + } + + aPOStream.close(); +} + +//Merge strings to source file +void PropParser::Merge( const OString &rMergeSrc, const OString &rDestinationFile ) +{ + assert( m_bIsInitialized ); + std::ofstream aDestination( + rDestinationFile.getStr(), std::ios_base::out | std::ios_base::trunc ); + if( !aDestination.is_open() ) { + std::cerr + << "Propex error: Cannot open source file for merge: " + << rDestinationFile << std::endl; + return; + } + + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( m_sLang != "qtz" ) + { + pMergeDataFile.reset( new MergeDataFile( rMergeSrc, m_sSource, false, false ) ); + + const std::vector<OString> vLanguages = pMergeDataFile->GetLanguages(); + if( !vLanguages.empty() && vLanguages[0] != m_sLang ) + { + std::cerr + << ("Propex error: given language conflicts with language of" + " Mergedata file: ") + << m_sLang << " - " + << vLanguages[0] << std::endl; + return; + } + } + + for( size_t nIndex = 0; nIndex < m_vLines.size(); ++nIndex ) + { + const OString sLine = m_vLines[nIndex]; + const sal_Int32 nEqualSign = sLine.indexOf('='); + if( !sLine.startsWith(" *") && !sLine.startsWith("/*") && + nEqualSign != -1 ) + { + const OString sID( sLine.copy( 0, sLine.indexOf('=') ).trim() ); + ResData aResData( sID, m_sSource ); + aResData.sResTyp = "property"; + OString sNewText; + if( m_sLang == "qtz" ) + { + const OString sOriginText = lcl_ConvertToUTF8(sLine.copy( nEqualSign + 1 ).trim()); + sNewText = MergeEntrys::GetQTZText(aResData, sOriginText); + } + else if( pMergeDataFile ) + { + MergeEntrys* pEntrys = pMergeDataFile->GetMergeEntrys( &aResData ); + if( pEntrys ) + { + pEntrys->GetText( sNewText, m_sLang ); + } + } + if( !sNewText.isEmpty() ) + { + aDestination << OString(sID + "="); + lcl_PrintJavaStyle( sNewText, aDestination ); + aDestination << std::endl; + } + else + { + aDestination << sLine << std::endl; + } + } + else + { + aDestination << sLine << std::endl; + } + } + aDestination.close(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/src_yy_wrapper.c b/l10ntools/source/src_yy_wrapper.c new file mode 100644 index 000000000..3fdcc392b --- /dev/null +++ b/l10ntools/source/src_yy_wrapper.c @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +// Helper to suppress warnings in lex generated c code, see #i57362# +#include "src_yy.c" + +void (*avoid_unused_yyunput_in_src_yy_c)() = yyunput; +int (*avoid_unused_yy_flex_strlen_in_src_yy_c)() = yy_flex_strlen; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/treemerge.cxx b/l10ntools/source/treemerge.cxx new file mode 100644 index 000000000..b6d246317 --- /dev/null +++ b/l10ntools/source/treemerge.cxx @@ -0,0 +1,287 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <iostream> +#include <fstream> +#include <cassert> +#include <cstring> + +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/xmlmemory.h> +#include <libxml/xmlstring.h> + +#include <export.hxx> +#include <helper.hxx> +#include <common.hxx> +#include <po.hxx> +#include <treemerge.hxx> + + +namespace +{ + // Extract strings from nodes on all level recursively + void lcl_ExtractLevel( + const xmlDocPtr pSource, const xmlNodePtr pRoot, + const xmlChar* pNodeName, PoOfstream& rPOStream ) + { + if( !pRoot->children ) + { + return; + } + for( xmlNodePtr pCurrent = pRoot->children->next; + pCurrent; pCurrent = pCurrent->next) + { + if (!xmlStrcmp(pCurrent->name, pNodeName)) + { + xmlChar* pID = xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("id")); + xmlChar* pText = + xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("title")); + + common::writePoEntry( + "Treex", rPOStream, pSource->name, helper::xmlStrToOString( pNodeName ), + helper::xmlStrToOString( pID ), OString(), OString(), helper::xmlStrToOString( pText )); + + xmlFree( pID ); + xmlFree( pText ); + + lcl_ExtractLevel( + pSource, pCurrent, reinterpret_cast<const xmlChar *>("node"), + rPOStream ); + } + } + } + + // Update id and content of the topic + xmlNodePtr lcl_UpdateTopic( + const xmlNodePtr pCurrent, const OString& rXhpRoot ) + { + xmlNodePtr pReturn = pCurrent; + xmlChar* pID = xmlGetProp(pReturn, reinterpret_cast<const xmlChar*>("id")); + const OString sID = + helper::xmlStrToOString( pID ); + xmlFree( pID ); + + const sal_Int32 nFirstSlash = sID.indexOf('/'); + // Update id attribute of topic + { + OString sNewID = + sID.copy( 0, nFirstSlash + 1 ) + + rXhpRoot.copy( rXhpRoot.lastIndexOf('/') + 1 ) + + sID.copy( sID.indexOf( '/', nFirstSlash + 1 ) ); + xmlSetProp( + pReturn, reinterpret_cast<const xmlChar*>("id"), + reinterpret_cast<const xmlChar*>(sNewID.getStr())); + } + + const OString sXhpPath = + rXhpRoot + + sID.copy(sID.indexOf('/', nFirstSlash + 1)); + xmlDocPtr pXhpFile = xmlParseFile( sXhpPath.getStr() ); + // if xhpfile is missing than put this topic into comment + if ( !pXhpFile ) + { + xmlNodePtr pTemp = pReturn; + xmlChar* sNewID = + xmlGetProp(pReturn, reinterpret_cast<const xmlChar*>("id")); + xmlChar* sComment = + xmlStrcat( xmlCharStrdup("removed "), sNewID ); + pReturn = xmlNewComment( sComment ); + xmlReplaceNode( pTemp, pReturn ); + xmlFree( pTemp ); + xmlFree( sNewID ); + xmlFree( sComment ); + } + // update topic's content on the basis of xhpfile's title + else + { + xmlNodePtr pXhpNode = xmlDocGetRootElement( pXhpFile ); + for( pXhpNode = pXhpNode->children; + pXhpNode; pXhpNode = pXhpNode->children ) + { + while( pXhpNode->type != XML_ELEMENT_NODE ) + { + pXhpNode = pXhpNode->next; + } + if(!xmlStrcmp(pXhpNode->name, reinterpret_cast<const xmlChar *>("title"))) + { + xmlChar* sTitle = + xmlNodeListGetString(pXhpFile, pXhpNode->children, 1); + OString sNewTitle = + helper::xmlStrToOString( sTitle ). + replaceAll("$[officename]","%PRODUCTNAME"). + replaceAll("$[officeversion]","%PRODUCTVERSION"); + xmlNodeSetContent( + pReturn, + xmlEncodeSpecialChars( nullptr, + reinterpret_cast<const xmlChar*>( + sNewTitle.getStr() ))); + xmlFree( sTitle ); + break; + } + } + if( !pXhpNode ) + { + std::cerr + << "Treex error: Cannot find title in " + << sXhpPath << std::endl; + return nullptr; + } + xmlFree( pXhpFile ); + xmlCleanupParser(); + } + return pReturn; + } + // Localize title attribute of help_section and node tags + void lcl_MergeLevel( + xmlDocPtr io_pSource, const xmlNodePtr pRoot, + const xmlChar * pNodeName, MergeDataFile* pMergeDataFile, + const OString& rLang, const OString& rXhpRoot ) + { + if( !pRoot->children ) + { + return; + } + for( xmlNodePtr pCurrent = pRoot->children; + pCurrent; pCurrent = pCurrent->next) + { + if( !xmlStrcmp(pCurrent->name, pNodeName) ) + { + if( rLang != "en-US" ) + { + OString sNewText; + xmlChar* pID = xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("id")); + ResData aResData( + helper::xmlStrToOString( pID ), + static_cast<OString>(io_pSource->name) ); + xmlFree( pID ); + aResData.sResTyp = helper::xmlStrToOString( pNodeName ); + if( pMergeDataFile ) + { + MergeEntrys* pEntrys = + pMergeDataFile->GetMergeEntrys( &aResData ); + if( pEntrys ) + { + pEntrys->GetText( sNewText, rLang ); + } + } + else if( rLang == "qtz" ) + { + xmlChar* pText = xmlGetProp(pCurrent, reinterpret_cast<const xmlChar*>("title")); + const OString sOriginText = helper::xmlStrToOString(pText); + xmlFree( pText ); + sNewText = MergeEntrys::GetQTZText(aResData, sOriginText); + } + if( !sNewText.isEmpty() ) + { + xmlSetProp( + pCurrent, reinterpret_cast<const xmlChar*>("title"), + reinterpret_cast<const xmlChar*>(sNewText.getStr())); + } + } + + lcl_MergeLevel( + io_pSource, pCurrent, reinterpret_cast<const xmlChar *>("node"), + pMergeDataFile, rLang, rXhpRoot ); + } + else if( !xmlStrcmp(pCurrent->name, reinterpret_cast<const xmlChar *>("topic")) ) + { + pCurrent = lcl_UpdateTopic( pCurrent, rXhpRoot ); + } + } + } +} + +TreeParser::TreeParser( + const OString& rInputFile, const OString& rLang ) + : m_pSource( nullptr ) + , m_sLang( rLang ) + , m_bIsInitialized( false ) +{ + m_pSource = xmlParseFile( rInputFile.getStr() ); + if ( !m_pSource ) { + std::cerr + << "Treex error: Cannot open source file: " + << rInputFile << std::endl; + return; + } + if( !m_pSource->name ) + { + m_pSource->name = static_cast<char *>(xmlMalloc(strlen(rInputFile.getStr())+1)); + strcpy( m_pSource->name, rInputFile.getStr() ); + } + m_bIsInitialized = true; +} + +TreeParser::~TreeParser() +{ + // be sure m_pSource is freed + if (m_bIsInitialized) + xmlFreeDoc( m_pSource ); +} + +void TreeParser::Extract( const OString& rPOFile ) +{ + assert( m_bIsInitialized ); + PoOfstream aPOStream( rPOFile, PoOfstream::APP ); + if( !aPOStream.isOpen() ) + { + std::cerr + << "Treex error: Cannot open po file for extract: " + << rPOFile << std::endl; + return; + } + + xmlNodePtr pRootNode = xmlDocGetRootElement( m_pSource ); + lcl_ExtractLevel( + m_pSource, pRootNode, reinterpret_cast<const xmlChar *>("help_section"), + aPOStream ); + + xmlFreeDoc( m_pSource ); + xmlCleanupParser(); + aPOStream.close(); + m_bIsInitialized = false; +} + +void TreeParser::Merge( + const OString &rMergeSrc, const OString &rDestinationFile, + const OString &rXhpRoot ) +{ + assert( m_bIsInitialized ); + + const xmlNodePtr pRootNode = xmlDocGetRootElement( m_pSource ); + std::unique_ptr<MergeDataFile> pMergeDataFile; + if( m_sLang != "qtz" && m_sLang != "en-US" ) + { + pMergeDataFile.reset(new MergeDataFile( + rMergeSrc, static_cast<OString>( m_pSource->name ), false, false )); + const std::vector<OString> vLanguages = pMergeDataFile->GetLanguages(); + if( !vLanguages.empty() && vLanguages[0] != m_sLang ) + { + std::cerr + << ("Treex error: given language conflicts with language of" + " Mergedata file: ") + << m_sLang << " - " + << vLanguages[0] << std::endl; + return; + } + } + lcl_MergeLevel( + m_pSource, pRootNode, reinterpret_cast<const xmlChar *>("help_section"), + pMergeDataFile.get(), m_sLang, rXhpRoot ); + + pMergeDataFile.reset(); + xmlSaveFile( rDestinationFile.getStr(), m_pSource ); + xmlFreeDoc( m_pSource ); + xmlCleanupParser(); + m_bIsInitialized = false; +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/treex.cxx b/l10ntools/source/treex.cxx new file mode 100644 index 000000000..4e8fe0b26 --- /dev/null +++ b/l10ntools/source/treex.cxx @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <iostream> +#include <cstring> +#include <sal/main.h> + +#include <common.hxx> +#include <treemerge.hxx> + +static void WriteUsage() +{ + std::cout + << ("Syntax: Treex [-r Root] -i FileIn -o FileOut" + " [-m DataBase] [-l Lang]\n" + " Root: Path to root of localized xhp files\n" + " FileIn: Source files (*.tree)\n" + " FileOut: Destination file (*.*)\n" + " DataBase: Mergedata (*.po)\n" + " Lang: Restrict the handled languages; one element of\n" + " (de, en-US, ...) or all\n"); +} + + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) +{ + OString sXHPRoot; + for (int nIndex = 1; nIndex != argc; ++nIndex) + { + if (std::strcmp(argv[nIndex], "-r") == 0) + { + sXHPRoot = OString( argv[nIndex + 1] ); + for( int nIndex2 = nIndex+3; nIndex2 < argc; nIndex2 = nIndex2 + 2 ) + { + argv[nIndex-3] = argv[nIndex-1]; + argv[nIndex-2] = argv[nIndex]; + } + argc = argc - 2; + break; + } + } + common::HandledArgs aArgs; + if( !common::handleArguments(argc, argv, aArgs) ) + { + WriteUsage(); + return 1; + } + + TreeParser aParser(aArgs.m_sInputFile, aArgs.m_sLanguage ); + if( !aParser.isInitialized() ) + { + return 1; + } + + if( aArgs.m_bMergeMode || !sXHPRoot.isEmpty() ) + { + aParser.Merge( aArgs.m_sMergeSrc, aArgs.m_sOutputFile, sXHPRoot ); + } + else + { + aParser.Extract( aArgs.m_sOutputFile ); + } + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/ulfconv/msi-encodinglist.txt b/l10ntools/source/ulfconv/msi-encodinglist.txt new file mode 100644 index 000000000..eaa1754cf --- /dev/null +++ b/l10ntools/source/ulfconv/msi-encodinglist.txt @@ -0,0 +1,180 @@ +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# + +# Syntax: language 0 LCID +# 2nd field used to be the ANSI codepage, +# but now we are using UTF-8 everywhere. +# comment lines begin with hash +af 0 1078 # Afrikaans +am 0 1118 # Amharic +ar 0 1025 +ar-SA 0 1025 +as 0 1101 # Assamese +as-IN 0 1101 # Assamese +ast 0 1610 +be 0 1059 # Belarusian +be-BY 0 1059 +bg 0 1026 # Bulgarian +bn 0 2117 # Bengali +bn-BD 0 2117 # Bengali Bangladesh +bn-IN 0 1093 # Bengali India +bo 0 2121 +br 0 1150 # Breton +brx 0 1603 # Bodo (India) +bs 0 5146 # bosnian +ca 0 1027 # Catalan +ca-valencia 0 2051 # Catalan Valencian +ckb 0 1170 # Central Kurdish (Sorani) +cs 0 1029 # Czech +cy 0 1106 # Welsh +da 0 1030 +de 0 1031 +dgo 0 1604 # Dogri (India) +dsb 0 2094 # Lower Sorbian +dz 0 3153 # Dzongkha +el 0 1032 +en-GB 0 2057 +en-US 0 1033 +en-ZA 0 7177 +eo 0 1553 # Esperanto +es 0 1034 +et 0 1061 +eu 0 1069 # Basque +fa 0 1065 # Farsi +fi 0 1035 +fo 0 1080 # Faroese +fr 0 1036 +fr-CA 0 3084 +fy 0 1122 # Frisian +fur 0 1585 +ga 0 2108 # Irish +gd 0 1169 # Gaelic (Scotland) +gl 0 1110 # Galician +gu 0 1095 # Gujarati +gu-IN 0 1095 # Gujarati +gug 0 1140 # Guarani - Paraguay +he 0 1037 +hi 0 1081 +hr 0 1050 # Croatian +ht 0 1626 # Haitian +hu 0 1038 +hsb 0 1070 # Upper Sorbian +hy 0 1067 # Armenian +id 0 1057 # Indonesian +is 0 1039 # Icelandic +it 0 1040 +ja 0 1041 +jbo 0 1624 +ka 0 1079 # Georgian +kab 0 1625 # Kabyle +kk 0 1087 +km 0 1107 # Khmer +kmr-Latn 0 1574 +kn 0 1099 # Kannada +kn-IN 0 1099 # Kannada +ko 0 1042 +kok 0 1111 # Konkani +ks 0 1120 # Kashmiri +ky 0 1088 # Kyrgyz +ky-CN 0 1640 # Kyrgyz (China) +lb 0 1134 +lo 0 1108 # Lao +lt 0 1063 # Lithuanian +lv 0 1062 # Latvian +mai 0 1605 # Maithili (India) +mk 0 1071 # Macedonian +ml 0 1100 +ml-IN 0 1100 +mn 0 1104 # Mongolian +mni 0 1112 # Manipuri +mn-TR 0 2128 # Mongolian Classical/traditional +mr 0 1102 # Marathi +mr-IN 0 1102 +ms 0 1086 # Malay (Malaysian) +mt 0 1082 # Maltese +my 0 1109 # Burmese +nb 0 1044 +ne 0 1121 # Nepali +nl 0 1043 +nn 0 2068 +no 0 1044 +nr 0 1580 # Ndebele South +nso 0 1132 +ny 0 1598 +oc 0 1154 # Occitan-lengadocian +om 0 1138 # Oromo +or 0 1096 # Odia +or-IN 0 1096 +pa-IN 0 1094 # Punjabi +pap 0 2171 +pl 0 1045 +ps 0 2171 +pt 0 2070 +pt-BR 0 1046 +pt-PT 0 2070 +qtz 0 1638 # key id pseudo language +rm 0 1047 # Raeto-Romance +ro 0 1048 # Romanian +ru 0 1049 +rw 0 1159 # Kinyarwanda +sa-IN 0 1103 # Sanskrit +sat 0 1606 # Santali +sb 0 1070 # Sorbian +sc 0 3047 +sd 0 1113 # Sindhi +si 0 1115 # Sinhala +sid 0 1669 # Sidama, fake LCID +sk 0 1051 # Slovak +sl 0 1060 # Slovenian +sq 0 1052 # Albanian +sr 0 3098 # Serbian Cyrillic +sr-Latn 0 2074 # Serbian Latin +sr-SP 0 3098 # Serbian Cyrillic +ss 0 1579 # Swazi +st 0 1072 # Southern Sotho, Sutu +sv 0 1053 +sw 0 1089 # Swahili +sw-TZ 0 1089 # Swahili +szl 0 1689 # Silesian +so 0 1143 +ta 0 1097 # Tamil +ta-IN 0 1097 # Tamil +te 0 1098 +te-IN 0 1098 +tg 0 1064 # Tajik +th 0 1054 +ti 0 1139 # Tigrinya +ti-ER 0 1139 # Tigrinya +tn 0 1074 # Setsuana +tr 0 1055 # Turkish +ts 0 1073 # Tsonga +tk 0 1090 +tt 0 1092 # Tatar +ug 0 1152 +uk 0 1058 # Ukrainian +ur 0 1056 # Urdu +ur-IN 0 2080 +uz 0 1091 # Uzbek (Latin) +ve 0 1075 # Venda +vec 0 1685 # Venetian +vi 0 1066 # Vietnamese +xh 0 1076 # Xhosa +yi 0 1085 # Yiddish +zh-CN 0 2052 +zh-TW 0 1028 +zu 0 1077 # Zulu diff --git a/l10ntools/source/xmlparse.cxx b/l10ntools/source/xmlparse.cxx new file mode 100644 index 000000000..54b9c6bf4 --- /dev/null +++ b/l10ntools/source/xmlparse.cxx @@ -0,0 +1,1119 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#include <sal/config.h> + +#include <iterator> /* std::iterator*/ + +#include <cassert> +#include <stdio.h> + +#include <helper.hxx> +#include <common.hxx> +#include <xmlparse.hxx> +#include <fstream> +#include <iostream> +#include <osl/thread.hxx> +#include <osl/process.h> +#include <rtl/strbuf.hxx> +#include <unicode/regex.h> + +using namespace std; +using namespace osl; + +#define XML_LANG "xml-lang" +#define ID "id" + + + + +XMLChildNode::XMLChildNode( XMLParentNode *pPar ) + : m_pParent( pPar ) +{ + if ( m_pParent ) + m_pParent->AddChild( this ); +} + + +XMLChildNode::XMLChildNode( const XMLChildNode& rObj) + : XMLNode(rObj), + m_pParent(rObj.m_pParent) +{ +} + +XMLChildNode& XMLChildNode::operator=(const XMLChildNode& rObj) +{ + if(this != &rObj) + { + m_pParent=rObj.m_pParent; + } + return *this; +} + + + + +XMLParentNode::~XMLParentNode() +{ + if( m_pChildList ) + { + RemoveAndDeleteAllChildren(); + } +} + +XMLParentNode::XMLParentNode( const XMLParentNode& rObj) +: XMLChildNode( rObj ) +{ + if( !rObj.m_pChildList ) + return; + + m_pChildList.reset( new XMLChildNodeList ); + for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ ) + { + XMLChildNode* pNode = (*rObj.m_pChildList)[ i ]; + if( pNode != nullptr) + { + switch(pNode->GetNodeType()) + { + case XMLNodeType::ELEMENT: + AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break; + case XMLNodeType::DATA: + AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break; + case XMLNodeType::COMMENT: + AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break; + case XMLNodeType::DEFAULT: + AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break; + default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj"); + } + } + } +} + +XMLParentNode& XMLParentNode::operator=(const XMLParentNode& rObj) +{ + if(this!=&rObj) + { + XMLChildNode::operator=(rObj); + if( m_pChildList ) + { + RemoveAndDeleteAllChildren(); + } + if( rObj.m_pChildList ) + { + m_pChildList.reset( new XMLChildNodeList ); + for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ ) + AddChild( (*rObj.m_pChildList)[ i ] ); + } + else + m_pChildList.reset(); + + } + return *this; +} +void XMLParentNode::AddChild( XMLChildNode *pChild ) +{ + if ( !m_pChildList ) + m_pChildList.reset( new XMLChildNodeList ); + m_pChildList->push_back( pChild ); +} + +void XMLParentNode::RemoveAndDeleteAllChildren() +{ + if ( m_pChildList ) + { + for ( size_t i = 0; i < m_pChildList->size(); i++ ) + delete (*m_pChildList)[ i ]; + m_pChildList->clear(); + } +} + + + + +void XMLFile::Write( OString const &aFilename ) +{ + std::ofstream s( + aFilename.getStr(), std::ios_base::out | std::ios_base::trunc); + if (!s.is_open()) + { + std::cerr + << "Error: helpex cannot create file " << aFilename + << '\n'; + std::exit(EXIT_FAILURE); + } + Write(s); + s.close(); +} + +void XMLFile::Write( ofstream &rStream , XMLNode *pCur ) +{ + if ( !pCur ) + Write( rStream, this ); + else { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + Write( rStream, (*GetChildList())[ i ] ); + } + break; + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + rStream << "<"; + rStream << pElement->GetName(); + if ( pElement->GetAttributeList()) + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { + rStream << " "; + OString sData( (*pElement->GetAttributeList())[ j ]->GetName() ); + rStream << XMLUtil::QuotHTML( sData ); + rStream << "=\""; + sData = (*pElement->GetAttributeList())[ j ]->GetValue(); + rStream << XMLUtil::QuotHTML( sData ); + rStream << "\""; + } + if ( !pElement->GetChildList()) + rStream << "/>"; + else + { + rStream << ">"; + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + Write( rStream, (*pElement->GetChildList())[ k ] ); + rStream << "</"; + rStream << pElement->GetName(); + rStream << ">"; + } + } + break; + case XMLNodeType::DATA: + { + OString sData( static_cast<const XMLData*>(pCur)->GetData()); + rStream << XMLUtil::QuotHTML( sData ); + } + break; + case XMLNodeType::COMMENT: + { + const XMLComment *pComment = static_cast<const XMLComment*>(pCur); + rStream << "<!--"; + rStream << pComment->GetComment(); + rStream << "-->"; + } + break; + case XMLNodeType::DEFAULT: + { + const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur); + rStream << pDefault->GetDefault(); + } + break; + } + } +} + +void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel ) +{ + if ( !pCur ) + Print( this ); + else + { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + Print( (*GetChildList())[ i ] ); + } + break; + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + + fprintf( stdout, "<%s", pElement->GetName().getStr()); + if ( pElement->GetAttributeList()) + { + for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j) + { + const OString aAttrName((*pElement->GetAttributeList())[j]->GetName()); + if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG)) + { + fprintf( stdout, " %s=\"%s\"", + aAttrName.getStr(), + (*pElement->GetAttributeList())[ j ]->GetValue().getStr()); + } + } + } + if ( !pElement->GetChildList()) + fprintf( stdout, "/>" ); + else + { + fprintf( stdout, ">" ); + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + Print( (*pElement->GetChildList())[ k ], nLevel + 1 ); + fprintf( stdout, "</%s>", pElement->GetName().getStr()); + } + } + break; + case XMLNodeType::DATA: + { + const XMLData *pData = static_cast<const XMLData*>(pCur); + fprintf( stdout, "%s", pData->GetData().getStr()); + } + break; + case XMLNodeType::COMMENT: + { + const XMLComment *pComment = static_cast<const XMLComment*>(pCur); + fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr()); + } + break; + case XMLNodeType::DEFAULT: + { + const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur); + fprintf( stdout, "%s", pDefault->GetDefault().getStr()); + } + break; + } + } +} +XMLFile::~XMLFile() +{ + if( m_pXMLStrings ) + { + for (auto const& pos : *m_pXMLStrings) + { + delete pos.second; // Check and delete content also ? + } + } +} + +XMLFile::XMLFile( const OString &rFileName ) // the file name, empty if created from memory stream + : XMLParentNode( nullptr ) + , m_sFileName( rFileName ) +{ + m_aNodes_localize.emplace( OString("bookmark") , true ); + m_aNodes_localize.emplace( OString("variable") , true ); + m_aNodes_localize.emplace( OString("paragraph") , true ); + m_aNodes_localize.emplace( OString("h1") , true ); + m_aNodes_localize.emplace( OString("h2") , true ); + m_aNodes_localize.emplace( OString("h3") , true ); + m_aNodes_localize.emplace( OString("h4") , true ); + m_aNodes_localize.emplace( OString("h5") , true ); + m_aNodes_localize.emplace( OString("h6") , true ); + m_aNodes_localize.emplace( OString("note") , true ); + m_aNodes_localize.emplace( OString("tip") , true ); + m_aNodes_localize.emplace( OString("warning") , true ); + m_aNodes_localize.emplace( OString("alt") , true ); + m_aNodes_localize.emplace( OString("caption") , true ); + m_aNodes_localize.emplace( OString("title") , true ); + m_aNodes_localize.emplace( OString("link") , true ); +} + +void XMLFile::Extract() +{ + m_pXMLStrings.reset( new XMLHashMap ); + SearchL10NElements( this ); +} + +void XMLFile::InsertL10NElement( XMLElement* pElement ) +{ + OString sId, sLanguage("en-US"); + LangHashMap* pElem; + + if( pElement->GetAttributeList() != nullptr ) + { + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { + const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName()); + // Get the "id" Attribute + if (sTempStr == ID) + { + sId = (*pElement->GetAttributeList())[ j ]->GetValue(); + } + // Get the "xml-lang" Attribute + if (sTempStr == XML_LANG) + { + sLanguage = (*pElement->GetAttributeList())[j]->GetValue(); + } + + } + } + else + { + fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found"); + fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++"); + Print( pElement ); + fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++"); + } + + XMLHashMap::iterator pos = m_pXMLStrings->find( sId ); + if( pos == m_pXMLStrings->end() ) // No instance, create new one + { + pElem = new LangHashMap; + (*pElem)[ sLanguage ]=pElement; + m_pXMLStrings->emplace( sId , pElem ); + m_vOrder.push_back( sId ); + } + else // Already there + { + pElem=pos->second; + if ( pElem->count(sLanguage) ) + { + fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() ); + exit( -1 ); + } + (*pElem)[ sLanguage ]=pElement; + } +} + +XMLFile::XMLFile( const XMLFile& rObj ) + : XMLParentNode( rObj ) + , m_sFileName( rObj.m_sFileName ) +{ + if( this != &rObj ) + { + m_aNodes_localize = rObj.m_aNodes_localize; + m_vOrder = rObj.m_vOrder; + } +} + +XMLFile& XMLFile::operator=(const XMLFile& rObj) +{ + if( this != &rObj ) + { + XMLParentNode::operator=(rObj); + + m_aNodes_localize = rObj.m_aNodes_localize; + m_vOrder = rObj.m_vOrder; + + m_pXMLStrings.reset(); + + if( rObj.m_pXMLStrings ) + { + m_pXMLStrings.reset( new XMLHashMap ); + for (auto const& pos : *rObj.m_pXMLStrings) + { + LangHashMap* pElem=pos.second; + LangHashMap* pNewelem = new LangHashMap; + for (auto const& pos2 : *pElem) + { + (*pNewelem)[ pos2.first ] = new XMLElement( *pos2.second ); + } + (*m_pXMLStrings)[ pos.first ] = pNewelem; + } + } + } + return *this; +} + +void XMLFile::SearchL10NElements( XMLChildNode *pCur ) +{ + if ( !pCur ) + SearchL10NElements( this ); + else + { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + { + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + { + XMLChildNode* pElement = (*GetChildList())[ i ]; + if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) + SearchL10NElements( pElement ); + } + } + } + break; + case XMLNodeType::ELEMENT: + { + bool bInsert = true; + XMLElement *pElement = static_cast<XMLElement*>(pCur); + const OString sName(pElement->GetName().toAsciiLowerCase()); + OString sLanguage("en-US"); + OString sTmpStrVal; + if ( pElement->GetAttributeList()) + { + for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j ) + { + const OString sTmpStr = (*pElement->GetAttributeList())[j]->GetName(); + if (sTmpStr == ID) + { + sTmpStrVal=(*pElement->GetAttributeList())[ j ]->GetValue(); + } + if (sTmpStr == "localize") + { + bInsert=false; + } + if (sTmpStr == XML_LANG) // Get the "xml-lang" Attribute + { + sLanguage=(*pElement->GetAttributeList())[ j ]->GetValue(); + } + } + } + + if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) ) + InsertL10NElement(pElement); + else if ( bInsert && pElement->GetChildList() ) + { + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + SearchL10NElements( (*pElement->GetChildList())[ k ] ); + } + } + break; + default: + break; + } + } +} + +bool XMLFile::CheckExportStatus( XMLParentNode *pCur ) +{ + static bool bStatusExport = true; + + if ( !pCur ) + CheckExportStatus( this ); + else { + switch( pCur->GetNodeType()) + { + case XMLNodeType::XFILE: + { + if( GetChildList()) + { + for ( size_t i = 0; i < GetChildList()->size(); i++ ) + { + XMLParentNode* pElement = static_cast<XMLParentNode*>((*GetChildList())[ i ]); + if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) CheckExportStatus( pElement );//, i); + } + } + } + break; + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC")) + { + if ( pElement->GetAttributeList()) + { + for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt; ++j) + { + const OString tmpStr((*pElement->GetAttributeList())[j]->GetName()); + if (tmpStr.equalsIgnoreAsciiCase("STATUS")) + { + const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue()); + if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") && + !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED")) + { + bStatusExport = false; + } + } + + } + } + } + else if ( pElement->GetChildList() ) + { + for (size_t k = 0; k < pElement->GetChildList()->size(); ++k) + CheckExportStatus( static_cast<XMLParentNode*>((*pElement->GetChildList())[k]) ); + } + } + break; + default: + break; + } + } + return bStatusExport; +} + +XMLElement::XMLElement( + const OString &rName, // the element name + XMLParentNode *pParent // parent node of this element +) + : XMLParentNode( pParent ) + , m_sElementName( rName ) +{ +} + +XMLElement::XMLElement(const XMLElement& rObj) + : XMLParentNode( rObj ) + , m_sElementName( rObj.m_sElementName ) +{ + if ( rObj.m_pAttributes ) + { + m_pAttributes.reset( new XMLAttributeList ); + for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ ) + AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() ); + } +} + +XMLElement& XMLElement::operator=(const XMLElement& rObj) +{ + if( this !=& rObj ) + { + XMLParentNode::operator=(rObj); + m_sElementName = rObj.m_sElementName; + + if ( m_pAttributes ) + { + for ( size_t i = 0; i < m_pAttributes->size(); i++ ) + delete (*m_pAttributes)[ i ]; + m_pAttributes.reset(); + } + if ( rObj.m_pAttributes ) + { + m_pAttributes.reset( new XMLAttributeList ); + for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ ) + AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() ); + } + } + return *this; +} + +void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue ) +{ + if ( !m_pAttributes ) + m_pAttributes.reset( new XMLAttributeList ); + m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) ); +} + +void XMLElement::ChangeLanguageTag( const OString &rValue ) +{ + if ( m_pAttributes ) + { + bool bWasSet = false; + for (size_t i = 0; i < m_pAttributes->size(); ++i) + { + if ((*m_pAttributes)[ i ]->GetName() == XML_LANG) + { + (*m_pAttributes)[ i ]->setValue(rValue); + bWasSet = true; + } + } + + if (!bWasSet) + AddAttribute(XML_LANG, rValue); + } + XMLChildNodeList* pCList = GetChildList(); + + if( !pCList ) + return; + + for ( size_t i = 0; i < pCList->size(); i++ ) + { + XMLChildNode* pNode = (*pCList)[ i ]; + if( pNode && pNode->GetNodeType() == XMLNodeType::ELEMENT ) + { + XMLElement* pElem = static_cast< XMLElement* >(pNode); + pElem->ChangeLanguageTag( rValue ); + pElem = nullptr; + pNode = nullptr; + } + } + pCList = nullptr; +} + +XMLElement::~XMLElement() +{ + if ( m_pAttributes ) + { + for ( size_t i = 0; i < m_pAttributes->size(); i++ ) + delete (*m_pAttributes)[ i ]; + } +} + +OString XMLElement::ToOString() +{ + OStringBuffer sBuffer; + Print(this,sBuffer,true); + return sBuffer.makeStringAndClear(); +} + +void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const +{ + if( pCur ) + { + if( bRootelement ) + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + if ( pElement->GetAttributeList()) + { + if ( pElement->GetChildList()) + { + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + { + XMLChildNode* pTmp = (*pElement->GetChildList())[ k ]; + Print( pTmp, rBuffer , false); + } + } + } + } + else + { + switch( pCur->GetNodeType()) + { + case XMLNodeType::ELEMENT: + { + XMLElement *pElement = static_cast<XMLElement*>(pCur); + + if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ) + { + rBuffer.append( "<" ); + rBuffer.append( pElement->GetName() ); + if ( pElement->GetAttributeList()) + { + for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) + { + const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() ); + if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG)) + { + rBuffer.append( + " " + aAttrName + "=\"" + + (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" ); + } + } + } + if ( !pElement->GetChildList()) + rBuffer.append( "/>" ); + else + { + rBuffer.append( ">" ); + for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ) + { + XMLChildNode* pTmp = (*pElement->GetChildList())[ k ]; + Print( pTmp, rBuffer , false); + } + rBuffer.append( "</" + pElement->GetName() + ">" ); + } + } + } + break; + case XMLNodeType::DATA: + { + const XMLData *pData = static_cast<const XMLData*>(pCur); + rBuffer.append( pData->GetData() ); + } + break; + case XMLNodeType::COMMENT: + { + const XMLComment *pComment = static_cast<const XMLComment*>(pCur); + rBuffer.append( "<!--" + pComment->GetComment() + "-->" ); + } + break; + case XMLNodeType::DEFAULT: + { + const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur); + rBuffer.append( pDefault->GetDefault() ); + } + break; + default: + break; + } + } + } + else + { + fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n"); + return; + } +} + + + + +namespace +{ + +OUString lcl_pathnameToAbsoluteUrl(const OString& rPathname) +{ + OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 ); + OUString sUrl; + if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl) + != osl::FileBase::E_None) + { + std::cerr << "Error: Cannot convert input pathname to URL\n"; + std::exit(EXIT_FAILURE); + } + OUString sCwd; + if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None) + { + std::cerr << "Error: Cannot determine cwd\n"; + std::exit(EXIT_FAILURE); + } + if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl) + != osl::FileBase::E_None) + { + std::cerr << "Error: Cannot convert input URL to absolute URL\n"; + std::exit(EXIT_FAILURE); + } + return sUrl; +} +} + + +SimpleXMLParser::SimpleXMLParser() + : m_pCurNode(nullptr) + , m_pCurData(nullptr) +{ + m_aParser = XML_ParserCreate( nullptr ); + XML_SetUserData( m_aParser, this ); + XML_SetElementHandler( m_aParser, StartElementHandler, EndElementHandler ); + XML_SetCharacterDataHandler( m_aParser, CharacterDataHandler ); + XML_SetCommentHandler( m_aParser, CommentHandler ); + XML_SetDefaultHandler( m_aParser, DefaultHandler ); +} + +SimpleXMLParser::~SimpleXMLParser() +{ + XML_ParserFree( m_aParser ); +} + +void SimpleXMLParser::StartElementHandler( + void *userData, const XML_Char *name, const XML_Char **atts ) +{ + static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts ); +} + +void SimpleXMLParser::EndElementHandler( + void *userData, const XML_Char * /*name*/ ) +{ + static_cast<SimpleXMLParser *>(userData)->EndElement(); +} + +void SimpleXMLParser::CharacterDataHandler( + void *userData, const XML_Char *s, int len ) +{ + static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len ); +} + +void SimpleXMLParser::CommentHandler( + void *userData, const XML_Char *data ) +{ + static_cast<SimpleXMLParser *>(userData)->Comment( data ); +} + +void SimpleXMLParser::DefaultHandler( + void *userData, const XML_Char *s, int len ) +{ + static_cast<SimpleXMLParser *>(userData)->Default( s, len ); +} + +void SimpleXMLParser::StartElement( + const XML_Char *name, const XML_Char **atts ) +{ + XMLElement *pElement = new XMLElement( OString(name), m_pCurNode ); + m_pCurNode = pElement; + m_pCurData = nullptr; + + int i = 0; + while( atts[i] ) + { + pElement->AddAttribute( atts[ i ], atts[ i + 1 ] ); + i += 2; + } +} + +void SimpleXMLParser::EndElement() +{ + m_pCurNode = m_pCurNode->GetParent(); + m_pCurData = nullptr; +} + +void SimpleXMLParser::CharacterData( const XML_Char *s, int len ) +{ + if ( !m_pCurData ) + { + OString x( s, len ); + m_pCurData = new XMLData( helper::UnQuotHTML(x) , m_pCurNode ); + } + else + { + OString x( s, len ); + m_pCurData->AddData( helper::UnQuotHTML(x) ); + + } +} + +void SimpleXMLParser::Comment( const XML_Char *data ) +{ + m_pCurData = nullptr; + new XMLComment( OString( data ), m_pCurNode ); +} + +void SimpleXMLParser::Default( const XML_Char *s, int len ) +{ + m_pCurData = nullptr; + new XMLDefault(OString( s, len ), m_pCurNode ); +} + +bool SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFile ) +{ + m_aErrorInformation.m_eCode = XML_ERROR_NONE; + m_aErrorInformation.m_nLine = 0; + m_aErrorInformation.m_nColumn = 0; + m_aErrorInformation.m_sMessage = "ERROR: Unable to open file "; + m_aErrorInformation.m_sMessage += rFileName; + + OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName)); + + oslFileHandle h; + if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read) + != osl_File_E_None) + { + return false; + } + + sal_uInt64 s; + oslFileError e = osl_getFileSize(h, &s); + void * p = nullptr; + if (e == osl_File_E_None) + { + e = osl_mapFile(h, &p, s, 0, 0); + } + if (e != osl_File_E_None) + { + osl_closeFile(h); + return false; + } + + pXMLFile->SetName( rFileName ); + + m_pCurNode = pXMLFile; + m_pCurData = nullptr; + + m_aErrorInformation.m_eCode = XML_ERROR_NONE; + m_aErrorInformation.m_nLine = 0; + m_aErrorInformation.m_nColumn = 0; + if ( !pXMLFile->GetName().isEmpty()) + { + m_aErrorInformation.m_sMessage = "File " + pXMLFile->GetName() + " parsed successfully"; + } + else + m_aErrorInformation.m_sMessage = "XML-File parsed successfully"; + + bool result = XML_Parse(m_aParser, static_cast< char * >(p), s, true); + if (!result) + { + m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser ); + m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser ); + m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser ); + + m_aErrorInformation.m_sMessage = "ERROR: "; + if ( !pXMLFile->GetName().isEmpty()) + m_aErrorInformation.m_sMessage += pXMLFile->GetName(); + else + m_aErrorInformation.m_sMessage += "XML-File ("; + + m_aErrorInformation.m_sMessage += + OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," + + OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): "; + + switch (m_aErrorInformation.m_eCode) + { + case XML_ERROR_NO_MEMORY: + m_aErrorInformation.m_sMessage += "No memory"; + break; + case XML_ERROR_SYNTAX: + m_aErrorInformation.m_sMessage += "Syntax"; + break; + case XML_ERROR_NO_ELEMENTS: + m_aErrorInformation.m_sMessage += "No elements"; + break; + case XML_ERROR_INVALID_TOKEN: + m_aErrorInformation.m_sMessage += "Invalid token"; + break; + case XML_ERROR_UNCLOSED_TOKEN: + m_aErrorInformation.m_sMessage += "Unclosed token"; + break; + case XML_ERROR_PARTIAL_CHAR: + m_aErrorInformation.m_sMessage += "Partial char"; + break; + case XML_ERROR_TAG_MISMATCH: + m_aErrorInformation.m_sMessage += "Tag mismatch"; + break; + case XML_ERROR_DUPLICATE_ATTRIBUTE: + m_aErrorInformation.m_sMessage += "Duplicated attribute"; + break; + case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: + m_aErrorInformation.m_sMessage += "Junk after doc element"; + break; + case XML_ERROR_PARAM_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Param entity ref"; + break; + case XML_ERROR_UNDEFINED_ENTITY: + m_aErrorInformation.m_sMessage += "Undefined entity"; + break; + case XML_ERROR_RECURSIVE_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Recursive entity ref"; + break; + case XML_ERROR_ASYNC_ENTITY: + m_aErrorInformation.m_sMessage += "Async_entity"; + break; + case XML_ERROR_BAD_CHAR_REF: + m_aErrorInformation.m_sMessage += "Bad char ref"; + break; + case XML_ERROR_BINARY_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Binary entity"; + break; + case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: + m_aErrorInformation.m_sMessage += "Attribute external entity ref"; + break; + case XML_ERROR_MISPLACED_XML_PI: + m_aErrorInformation.m_sMessage += "Misplaced xml pi"; + break; + case XML_ERROR_UNKNOWN_ENCODING: + m_aErrorInformation.m_sMessage += "Unknown encoding"; + break; + case XML_ERROR_INCORRECT_ENCODING: + m_aErrorInformation.m_sMessage += "Incorrect encoding"; + break; + case XML_ERROR_UNCLOSED_CDATA_SECTION: + m_aErrorInformation.m_sMessage += "Unclosed cdata section"; + break; + case XML_ERROR_EXTERNAL_ENTITY_HANDLING: + m_aErrorInformation.m_sMessage += "External entity handling"; + break; + case XML_ERROR_NOT_STANDALONE: + m_aErrorInformation.m_sMessage += "Not standalone"; + break; + case XML_ERROR_NONE: + break; + default: + break; + } + } + + osl_unmapMappedFile(h, p, s); + osl_closeFile(h); + + return result; +} + +namespace +{ + +icu::UnicodeString lcl_QuotRange( + const icu::UnicodeString& rString, const sal_Int32 nStart, + const sal_Int32 nEnd, bool bInsideTag = false ) +{ + icu::UnicodeString sReturn; + assert( nStart < nEnd ); + assert( nStart >= 0 ); + assert( nEnd <= rString.length() ); + for (sal_Int32 i = nStart; i < nEnd; ++i) + { + switch (rString[i]) + { + case '<': + sReturn.append("<"); + break; + case '>': + sReturn.append(">"); + break; + case '"': + if( !bInsideTag ) + sReturn.append("""); + else + sReturn.append(rString[i]); + break; + case '&': + if (rString.startsWith("&", i, 5)) + sReturn.append('&'); + else + sReturn.append("&"); + break; + default: + sReturn.append(rString[i]); + break; + } + } + return sReturn; +} + +bool lcl_isTag( const icu::UnicodeString& rString ) +{ + static const int nSize = 20; + static const icu::UnicodeString vTags[nSize] = { + "ahelp", "link", "item", "emph", "defaultinline", + "switchinline", "caseinline", "variable", + "bookmark_value", "image", "object", + "embedvar", "alt", "sup", "sub", + "menuitem", "keycode", "input", "literal", "widget" + }; + + for( int nIndex = 0; nIndex < nSize; ++nIndex ) + { + if( rString.startsWith("<" + vTags[nIndex]) || + rString == "</" + vTags[nIndex] + ">" ) + return true; + } + + return rString == "<br/>" || rString =="<help-id-missing/>"; +} + +} /// anonymous namespace + +OString XMLUtil::QuotHTML( const OString &rString ) +{ + if( rString.trim().isEmpty() ) + return rString; + UErrorCode nIcuErr = U_ZERO_ERROR; + static const sal_uInt32 nSearchFlags = + UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE; + static const icu::UnicodeString sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" ); + + const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8); + icu::UnicodeString sSource( + reinterpret_cast<const UChar*>( + sOUSource.getStr()), sOUSource.getLength() ); + + icu::RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr ); + aRegexMatcher.reset( sSource ); + + icu::UnicodeString sReturn; + int32_t nEndPos = 0; + int32_t nStartPos = 0; + while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) ) + { + nStartPos = aRegexMatcher.start(nIcuErr); + if ( nEndPos < nStartPos ) + sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos)); + nEndPos = aRegexMatcher.end(nIcuErr); + icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr); + if( lcl_isTag(sMatch) ) + { + sReturn.append("<"); + sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true)); + sReturn.append(">"); + } + else + sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos)); + nStartPos = nEndPos; + } + if( nEndPos < sSource.length() ) + sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length())); + sReturn.append('\0'); + return + OUStringToOString( + OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer())), + RTL_TEXTENCODING_UTF8); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/xrm_yy_wrapper.c b/l10ntools/source/xrm_yy_wrapper.c new file mode 100644 index 000000000..36f902e0f --- /dev/null +++ b/l10ntools/source/xrm_yy_wrapper.c @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +// Helper to suppress warnings in lex generated c code, see #i57362# +#include "xrm_yy.c" + +void (*avoid_unused_yyunput_in_xrm_yy_c)() = yyunput; +int (*avoid_unused_yy_flex_strlen_in_xrm_yy_c)() = yy_flex_strlen; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/l10ntools/source/xrmlex.l b/l10ntools/source/xrmlex.l new file mode 100644 index 000000000..0644a5bc2 --- /dev/null +++ b/l10ntools/source/xrmlex.l @@ -0,0 +1,218 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +%{ + +/* + * lexer for parsing xml-property source files (*.xml) + */ + +#include <sal/config.h> + +/* enlarge token buffer to tokenize whole strings */ +#undef YYLMAX +#define YYLMAX 64000 + +/* to enable debug output define LEXDEBUG */ +#define LEXDEBUG 1 +#ifdef LEXDEBUG +#define OUTPUT fprintf +#else +#define OUTPUT(Par1,Par2); +#endif + +/* table of possible token ids */ +#include <tokens.h> +#include <xrmlex.hxx> +#include <stdlib.h> +#include <stdio.h> + +#include <sal/main.h> + +#define YY_NO_UNISTD_H + +static int bText=0; +%} + +%option yylineno +%option nounput +%option never-interactive + +%p 24000 +%e 1200 +%n 500 + +%% + +"<p "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</p>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} + +"<h1 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h1>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h2 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h2>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h3 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h3>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h4 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h4>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} +"<h5 "[^\>]*xml:lang[^\>]*\> { + WorkOnTokenSet( XRM_TEXT_START , yytext ); +} + +"</h5>" { + WorkOnTokenSet( XRM_TEXT_END, yytext ); +} + +"<display-name>" { + WorkOnTokenSet( DESC_DISPLAY_NAME_START , yytext ); +} + +"</display-name>" { + WorkOnTokenSet( DESC_DISPLAY_NAME_END, yytext ); +} + +"<name "[^\>]*lang[^\>]*\> { + WorkOnTokenSet( DESC_TEXT_START , yytext ); +} + +"</name>" { + WorkOnTokenSet( DESC_TEXT_END, yytext ); +} + +"<extension-description>" { + WorkOnTokenSet( DESC_EXTENSION_DESCRIPTION_START , yytext ); +} + +"</extension-description>" { + WorkOnTokenSet( DESC_EXTENSION_DESCRIPTION_END , yytext ); +} + +"<src "[^\>]*lang[^\>]*\> { + WorkOnTokenSet( DESC_EXTENSION_DESCRIPTION_SRC , yytext ); +} + + + +"<!--" { + int c1 = 0, c2 = 0; + int c3 = yyinput(); + char pChar[2]; + pChar[1] = 0x00; + pChar[0] = c3; + + WorkOnTokenSet( COMMENT, yytext ); + WorkOnTokenSet( COMMENT, pChar ); + + for(;;) { + if ( c3 == EOF ) + break; + if ( c1 == '-' && c2 == '-' && c3 == '>' ) + break; + c1 = c2; + c2 = c3; + c3 = yyinput(); + pChar[0] = c3; + WorkOnTokenSet( COMMENT, pChar ); + } +} + +.|\n { + if ( bText == 1 ) + WorkOnTokenSet( XML_TEXTCHAR, yytext ); + else + WorkOnTokenSet( UNKNOWNCHAR, yytext ); +} + + +%% + +/*****************************************************************************/ +int yywrap(void) +/*****************************************************************************/ +{ + return 1; +} + +/*****************************************************************************/ +void yyerror ( const char *s ) +/*****************************************************************************/ +{ + /* write error to stderr */ + fprintf( stderr, + "Error: \"%s\" in line %d: \"%s\"\n", s, yylineno, yytext ); + SetError(); +} + +SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv) { + /* error level */ + int nRetValue = 0; + FILE *pFile; + + if ( !GetOutputFile( argc, argv ) ) + { + return 1; + } + pFile = GetXrmFile(); + InitXrmExport( getFilename() ); + + if ( !pFile ) + return 1; + + yyin = pFile; + + /* create global instance of class XmlExport */ + //InitXrmExport( pOutput ); + + /* start parser */ + yylex(); + + /* get error info. and end export */ + nRetValue = GetError(); + EndXrmExport(); + + /* return error level */ + return nRetValue; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/l10ntools/source/xrmmerge.cxx b/l10ntools/source/xrmmerge.cxx new file mode 100644 index 000000000..b77e0138e --- /dev/null +++ b/l10ntools/source/xrmmerge.cxx @@ -0,0 +1,496 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cstring> + +#include <stdio.h> + +#include <common.hxx> +#include <export.hxx> +#include <po.hxx> +#include <xrmlex.hxx> +#include <xrmmerge.hxx> +#include <tokens.h> +#include <helper.hxx> +#include <iostream> +#include <fstream> +#include <vector> +#include <memory> + +using namespace std; + +// set of global variables +static bool bMergeMode; +static bool bDisplayName; +static bool bExtensionDescription; +static OString sLanguage; +static OString sInputFileName; +static OString sOutputFile; +static OString sMergeSrc; +static OString sLangAttribute; +static OString sResourceType; +static XRMResParser *pParser = nullptr; + +extern "C" { +// the whole interface to lexer is in this extern "C" section + +extern bool GetOutputFile( int argc, char* argv[]) +{ + bDisplayName = false; + bExtensionDescription = false; + + common::HandledArgs aArgs; + if ( common::handleArguments(argc, argv, aArgs) ) + { + bMergeMode = aArgs.m_bMergeMode; + sLanguage = aArgs.m_sLanguage; + sInputFileName = aArgs.m_sInputFile; + sOutputFile = aArgs.m_sOutputFile; + sMergeSrc = aArgs.m_sMergeSrc; + return true; + } + else + { + // command line is not valid + common::writeUsage("xrmex","*.xrm/*.xml"); + return false; + } +} + +int InitXrmExport( const char* pFilename) +{ + // instantiate Export + OString sFilename( pFilename ); + + if ( bMergeMode ) + pParser = new XRMResMerge( sMergeSrc, sOutputFile, sFilename ); + else if (!sOutputFile.isEmpty()) + pParser = new XRMResExport( sOutputFile, sInputFileName ); + + return 1; +} + +int EndXrmExport() +{ + delete pParser; + return 1; +} +extern const char* getFilename() +{ + return sInputFileName.getStr(); +} + +extern FILE *GetXrmFile() +{ + // look for valid filename + if (!sInputFileName.isEmpty()) { + //TODO: explicit BOM handling? + FILE * pFile = fopen(sInputFileName.getStr(), "r"); + if ( !pFile ){ + fprintf( stderr, "Error: Could not open file %s\n", + sInputFileName.getStr()); + } + else { + return pFile; + } + } + // this means the file could not be opened + return nullptr; +} + +int WorkOnTokenSet( int nTyp, char *pTokenText ) +{ + //printf("Typ = %d , text = '%s'\n",nTyp , pTokenText ); + pParser->Execute( nTyp, pTokenText ); + + return 1; +} + +int SetError() +{ + pParser->SetError(); + return 1; +} +} + +extern "C" { + +int GetError() +{ + return pParser->GetError(); +} +} + + + + +XRMResParser::XRMResParser() + : bError( false ), + bText( false ) +{ +} + +XRMResParser::~XRMResParser() +{ +} + +void XRMResParser::Execute( int nToken, char * pToken ) +{ + OString rToken( pToken ); + + switch ( nToken ) { + case XRM_TEXT_START:{ + OString sNewGID = GetAttribute( rToken, "id" ); + if ( sNewGID != sGID ) { + sGID = sNewGID; + } + bText = true; + sCurrentText = OString(); + sCurrentOpenTag = rToken; + Output( rToken ); + } + break; + + case XRM_TEXT_END: { + sCurrentCloseTag = rToken; + sResourceType = OString ( "readmeitem" ); + sLangAttribute = OString ( "xml:lang" ); + WorkOnText( sCurrentOpenTag, sCurrentText ); + Output( sCurrentText ); + EndOfText( sCurrentOpenTag, sCurrentCloseTag ); + bText = false; + rToken = OString(); + sCurrentText = OString(); + } + break; + + case DESC_DISPLAY_NAME_START:{ + bDisplayName = true; + } + break; + + case DESC_DISPLAY_NAME_END:{ + bDisplayName = false; + } + break; + + case DESC_TEXT_START:{ + if (bDisplayName) { + sGID = OString("dispname"); + bText = true; + sCurrentText = OString(); + sCurrentOpenTag = rToken; + Output( rToken ); + } + } + break; + + case DESC_TEXT_END: { + if (bDisplayName) { + sCurrentCloseTag = rToken; + sResourceType = OString ( "description" ); + sLangAttribute = OString ( "lang" ); + WorkOnText( sCurrentOpenTag, sCurrentText ); + Output( sCurrentText ); + EndOfText( sCurrentOpenTag, sCurrentCloseTag ); + bText = false; + rToken = OString(); + sCurrentText = OString(); + } + } + break; + + case DESC_EXTENSION_DESCRIPTION_START: { + bExtensionDescription = true; + } + break; + + case DESC_EXTENSION_DESCRIPTION_END: { + bExtensionDescription = false; + } + break; + + case DESC_EXTENSION_DESCRIPTION_SRC: { + if (bExtensionDescription) { + sGID = OString("extdesc"); + sResourceType = OString ( "description" ); + sLangAttribute = OString ( "lang" ); + sCurrentOpenTag = rToken; + sCurrentText = OString(); + Output( rToken ); + WorkOnDesc( sCurrentOpenTag, sCurrentText ); + sCurrentCloseTag = rToken; + Output( sCurrentText ); + rToken = OString(); + sCurrentText = OString(); + } + } + break; + + default: + if ( bText ) { + sCurrentText += rToken; + } + break; + } + + if ( !bText ) + { + Output( rToken ); + } +} + +OString XRMResParser::GetAttribute( const OString &rToken, const OString &rAttribute ) +{ + const OString sSearch{ " " + rAttribute + "=" }; + OString sTmp{ rToken.replace('\t', ' ') }; + sal_Int32 nPos = sTmp.indexOf( sSearch ); + + if ( nPos<0 ) + return OString(); + + return sTmp.getToken(1, '"', nPos); +} + + +void XRMResParser::Error( const OString &rError ) +{ + yyerror(rError.getStr()); +} + + + + +XRMResExport::XRMResExport( + const OString &rOutputFile, const OString &rFilePath ) + : XRMResParser(), + sPath( rFilePath ) +{ + pOutputStream.open( rOutputFile, PoOfstream::APP ); + if (!pOutputStream.isOpen()) + { + Error( "Unable to open output file: " + rOutputFile ); + } +} + +XRMResExport::~XRMResExport() +{ + pOutputStream.close(); +} + +void XRMResExport::Output( const OString& ) {} + +void XRMResExport::WorkOnDesc( + const OString &rOpenTag, + OString &rText ) +{ + const OString sDescFileName{ sInputFileName.replaceAll("description.xml", OString()) + + GetAttribute( rOpenTag, "xlink:href" ) }; + ifstream file (sDescFileName.getStr(), ios::in|ios::binary|ios::ate); + if (file.is_open()) { + int size = static_cast<int>(file.tellg()); + std::unique_ptr<char[]> memblock(new char [size+1]); + file.seekg (0, ios::beg); + file.read (memblock.get(), size); + file.close(); + memblock[size] = '\0'; + rText = OString(memblock.get()); + } + WorkOnText( rOpenTag, rText ); + EndOfText( rOpenTag, rOpenTag ); +} + +void XRMResExport::WorkOnText( + const OString &rOpenTag, + OString &rText ) +{ + OString sLang( GetAttribute( rOpenTag, sLangAttribute )); + + if ( !pResData ) + { + pResData.reset( new ResData( GetGID() ) ); + } + pResData->sText[sLang] = rText; +} + +void XRMResExport::EndOfText( + const OString &, + const OString & ) +{ + if ( pResData ) + { + OString sAct = pResData->sText["en-US"]; + + if( !sAct.isEmpty() ) + common::writePoEntry( + "Xrmex", pOutputStream, sPath, sResourceType, + pResData->sGId, OString(), OString(), sAct ); + } + pResData.reset(); +} + + + + +XRMResMerge::XRMResMerge( + const OString &rMergeSource, const OString &rOutputFile, + const OString &rFilename ) + : XRMResParser(), + sFilename( rFilename ) +{ + if (!rMergeSource.isEmpty() && sLanguage.equalsIgnoreAsciiCase("ALL")) + { + pMergeDataFile.reset(new MergeDataFile( + rMergeSource, sInputFileName, false)); + aLanguages = pMergeDataFile->GetLanguages(); + } + else + aLanguages.push_back( sLanguage ); + pOutputStream.open( + rOutputFile.getStr(), std::ios_base::out | std::ios_base::trunc); + if (!pOutputStream.is_open()) { + Error( "Unable to open output file: " + rOutputFile ); + } +} + +XRMResMerge::~XRMResMerge() +{ + pOutputStream.close(); +} + +void XRMResMerge::WorkOnDesc( + const OString &rOpenTag, + OString &rText ) +{ + WorkOnText( rOpenTag, rText); + if ( pMergeDataFile && pResData ) { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrys( pResData.get() ); + if ( pEntrys ) { + OString sCur; + OString sDescFilename = GetAttribute ( rOpenTag, "xlink:href" ); + for( size_t n = 0; n < aLanguages.size(); n++ ){ + sCur = aLanguages[ n ]; + OString sText; + if ( !sCur.equalsIgnoreAsciiCase("en-US") && + ( pEntrys->GetText( sText, sCur, true )) && + !sText.isEmpty()) + { + OString sAdditionalLine{ "\n " + rOpenTag }; + OString sSearch{ sLangAttribute + "=\"" }; + OString sReplace( sSearch ); + + sSearch += GetAttribute( rOpenTag, sLangAttribute ); + sReplace += sCur; + sAdditionalLine = sAdditionalLine.replaceFirst( + sSearch, sReplace); + + sSearch = OString("xlink:href=\""); + sReplace = sSearch; + + const OString sLocDescFilename = sDescFilename.replaceFirst( "en-US", sCur); + + sSearch += sDescFilename; + sReplace += sLocDescFilename; + sAdditionalLine = sAdditionalLine.replaceFirst( + sSearch, sReplace); + + Output( sAdditionalLine ); + + sal_Int32 i = sOutputFile.lastIndexOf('/'); + if (i == -1) { + std::cerr + << "Error: output file " << sOutputFile + << " does not contain any /\n"; + throw false; //TODO + } + OString sOutputDescFile( + sOutputFile.copy(0, i + 1) + sLocDescFilename); + ofstream file(sOutputDescFile.getStr()); + if (file.is_open()) { + file << sText; + file.close(); + } else { + std::cerr + << "Error: cannot write " + << sOutputDescFile << '\n'; + throw false; //TODO + } + } + } + } + } + pResData.reset(); +} + +void XRMResMerge::WorkOnText( + const OString &, + OString & ) +{ + if ( pMergeDataFile && !pResData ) { + pResData.reset( new ResData( GetGID(), sFilename ) ); + pResData->sResTyp = sResourceType; + } +} + +void XRMResMerge::Output( const OString& rOutput ) +{ + if (!rOutput.isEmpty()) + pOutputStream << rOutput; +} + +void XRMResMerge::EndOfText( + const OString &rOpenTag, + const OString &rCloseTag ) +{ + + Output( rCloseTag ); + if ( pMergeDataFile && pResData ) { + MergeEntrys *pEntrys = pMergeDataFile->GetMergeEntrys( pResData.get() ); + if ( pEntrys ) { + OString sCur; + for( size_t n = 0; n < aLanguages.size(); n++ ){ + sCur = aLanguages[ n ]; + OString sContent; + if (!sCur.equalsIgnoreAsciiCase("en-US") && + ( pEntrys->GetText( sContent, sCur, true )) && + !sContent.isEmpty() && + helper::isWellFormedXML( sContent )) + { + const OString& sText( sContent ); + OString sAdditionalLine{ "\n " + rOpenTag }; + OString sSearch{ sLangAttribute + "=\"" }; + OString sReplace( sSearch ); + + sSearch += GetAttribute( rOpenTag, sLangAttribute ); + sReplace += sCur; + + sAdditionalLine = sAdditionalLine.replaceFirst( + sSearch, sReplace) + sText + rCloseTag; + + Output( sAdditionalLine ); + } + } + } + } + pResData.reset(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |