summaryrefslogtreecommitdiffstats
path: root/lingucomponent/source/languageguessing/simpleguesser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'lingucomponent/source/languageguessing/simpleguesser.cxx')
-rw-r--r--lingucomponent/source/languageguessing/simpleguesser.cxx221
1 files changed, 221 insertions, 0 deletions
diff --git a/lingucomponent/source/languageguessing/simpleguesser.cxx b/lingucomponent/source/languageguessing/simpleguesser.cxx
new file mode 100644
index 000000000..7210b1f45
--- /dev/null
+++ b/lingucomponent/source/languageguessing/simpleguesser.cxx
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+ /**
+ *
+ *
+ *
+ *
+ * TODO
+ * - Add exception throwing when h == NULL
+ * - Not init h when implicit constructor is launched
+ */
+
+#include <string.h>
+
+#ifdef SYSTEM_LIBEXTTEXTCAT
+#include <libexttextcat/textcat.h>
+#include <libexttextcat/common.h>
+#include <libexttextcat/constants.h>
+#include <libexttextcat/fingerprint.h>
+#else
+#include <textcat.h>
+#include <common.h>
+#include <constants.h>
+#include <fingerprint.h>
+#endif
+
+#include <sal/types.h>
+
+#include<rtl/character.hxx>
+#include "simpleguesser.hxx"
+
+static int startsAsciiCaseInsensitive(const std::string &s1, const std::string &s2){
+ size_t i;
+ int ret = 0;
+
+ size_t min = s1.length();
+ if (min > s2.length())
+ min = s2.length();
+
+ for(i = 0; i < min && s2[i] && s1[i] && !ret; i++){
+ ret = rtl::toAsciiUpperCase(static_cast<unsigned char>(s1[i]))
+ - rtl::toAsciiUpperCase(static_cast<unsigned char>(s2[i]));
+ if(s1[i] == '.' || s2[i] == '.') {ret = 0;} //. is a neutral character
+ }
+ return ret;
+ }
+
+namespace {
+
+/**
+ * This following structure is from textcat.c
+ */
+typedef struct textcat_t{
+
+ void **fprint;
+ char *fprint_disable;
+ uint4 size;
+ uint4 maxsize;
+
+ char output[MAXOUTPUTSIZE];
+
+} textcat_t;
+// end of the 3 structs
+
+}
+
+SimpleGuesser::SimpleGuesser()
+{
+ h = nullptr;
+}
+
+SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
+ // Check for self-assignment!
+ if (this == &sg) // Same object?
+ return *this; // Yes, so skip assignment, and just return *this.
+
+ if(h){textcat_Done(h);}
+ h = sg.h;
+ return *this;
+}
+
+SimpleGuesser::~SimpleGuesser()
+{
+ if(h){textcat_Done(h);}
+}
+
+/*!
+ \fn SimpleGuesser::GuessLanguage(char* text)
+ */
+std::vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
+{
+ std::vector<Guess> guesses;
+
+ if (!h)
+ return guesses;
+
+ int len = strlen(text);
+
+ if (len > MAX_STRING_LENGTH_TO_ANALYSE)
+ len = MAX_STRING_LENGTH_TO_ANALYSE;
+
+ const char *guess_list = textcat_Classify(h, text, len);
+
+ if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
+ return guesses;
+
+ int current_pointer = 0;
+
+ while(guess_list[current_pointer] != '\0')
+ {
+ while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
+ current_pointer++;
+ if(guess_list[current_pointer] != '\0')
+ {
+ Guess g(guess_list + current_pointer);
+
+ guesses.push_back(g);
+
+ current_pointer++;
+ }
+ }
+
+ return guesses;
+}
+
+Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
+{
+ std::vector<Guess> ret = GuessLanguage(text);
+ return ret.empty() ? Guess() : ret[0];
+}
+/**
+ * Is used to know which language is available, unavailable or both
+ * when mask = 0xF0, return only Available
+ * when mask = 0x0F, return only Unavailable
+ * when mask = 0xFF, return both Available and Unavailable
+ */
+std::vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
+{
+ textcat_t *tables = static_cast<textcat_t*>(h);
+
+ std::vector<Guess> lang;
+ if(!h){return lang;}
+
+ for (size_t i=0; i<tables->size; ++i)
+ {
+ if (tables->fprint_disable[i] & mask)
+ {
+ std::string langStr = "[";
+ langStr += fp_Name(tables->fprint[i]);
+ Guess g(langStr.c_str());
+ lang.push_back(g);
+ }
+ }
+
+ return lang;
+}
+
+std::vector<Guess> SimpleGuesser::GetAvailableLanguages()
+{
+ return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
+}
+
+std::vector<Guess> SimpleGuesser::GetUnavailableLanguages()
+{
+ return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
+}
+
+std::vector<Guess> SimpleGuesser::GetAllManagedLanguages()
+{
+ return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
+}
+
+void SimpleGuesser::XableLanguage(const std::string& lang, char mask)
+{
+ textcat_t *tables = static_cast<textcat_t*>(h);
+
+ if(!h){return;}
+
+ for (size_t i=0; i<tables->size; i++)
+ {
+ std::string language(fp_Name(tables->fprint[i]));
+ if (startsAsciiCaseInsensitive(language,lang) == 0)
+ tables->fprint_disable[i] = mask;
+ }
+}
+
+void SimpleGuesser::EnableLanguage(const std::string& lang)
+{
+ XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
+}
+
+void SimpleGuesser::DisableLanguage(const std::string& lang)
+{
+ XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
+}
+
+void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
+{
+ if (h)
+ textcat_Done(h);
+ h = special_textcat_Init(path, prefix);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */