From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 19:32:43 +0200
Subject: Adding upstream version 1:115.7.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 dom/media/webspeech/synth/windows/SapiService.cpp | 445 ++++++++++++++++++++++
 dom/media/webspeech/synth/windows/SapiService.h   |  57 +++
 dom/media/webspeech/synth/windows/components.conf |  17 +
 dom/media/webspeech/synth/windows/moz.build       |  17 +
 4 files changed, 536 insertions(+)
 create mode 100644 dom/media/webspeech/synth/windows/SapiService.cpp
 create mode 100644 dom/media/webspeech/synth/windows/SapiService.h
 create mode 100644 dom/media/webspeech/synth/windows/components.conf
 create mode 100644 dom/media/webspeech/synth/windows/moz.build

(limited to 'dom/media/webspeech/synth/windows')
diff --git a/dom/media/webspeech/synth/windows/SapiService.cpp b/dom/media/webspeech/synth/windows/SapiService.cpp
new file mode 100644
index 0000000000..f1e44213d1
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/SapiService.cpp
@@ -0,0 +1,445 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "SapiService.h"
+#include "nsServiceManagerUtils.h"
+#include "nsEscape.h"
+#include "nsXULAppAPI.h"
+
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/StaticPrefs_media.h"
+
+namespace mozilla::dom {
+
+constexpr static WCHAR kSpCategoryOneCoreVoices[] =
+    L"HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices";
+
+StaticRefPtr<SapiService> SapiService::sSingleton;
+
+class SapiCallback final : public nsISpeechTaskCallback {
+ public:
+  SapiCallback(nsISpeechTask* aTask, ISpVoice* aSapiClient,
+               uint32_t aTextOffset, uint32_t aSpeakTextLen)
+      : mTask(aTask),
+        mSapiClient(aSapiClient),
+        mTextOffset(aTextOffset),
+        mSpeakTextLen(aSpeakTextLen),
+        mCurrentIndex(0),
+        mStreamNum(0) {
+    mStartingTime = TimeStamp::Now();
+  }
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SapiCallback, nsISpeechTaskCallback)
+
+  NS_DECL_NSISPEECHTASKCALLBACK
+
+  ULONG GetStreamNum() const { return mStreamNum; }
+  void SetStreamNum(ULONG aValue) { mStreamNum = aValue; }
+
+  void OnSpeechEvent(const SPEVENT& speechEvent);
+
+ private:
+  ~SapiCallback() {}
+
+  float GetTimeDurationFromStart() const {
+    TimeDuration duration = TimeStamp::Now() - mStartingTime;
+    return duration.ToSeconds();
+  }
+
+  // This pointer is used to dispatch events
+  nsCOMPtr<nsISpeechTask> mTask;
+  RefPtr<ISpVoice> mSapiClient;
+
+  uint32_t mTextOffset;
+  uint32_t mSpeakTextLen;
+
+  // Used for calculating the time taken to speak the utterance
+  TimeStamp mStartingTime;
+  uint32_t mCurrentIndex;
+
+  ULONG mStreamNum;
+};
+
+NS_IMPL_CYCLE_COLLECTION(SapiCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SapiCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SapiCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SapiCallback)
+
+NS_IMETHODIMP
+SapiCallback::OnPause() {
+  if (FAILED(mSapiClient->Pause())) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!mTask) {
+    // When calling pause() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiCallback::OnResume() {
+  if (FAILED(mSapiClient->Resume())) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!mTask) {
+    // When calling resume() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiCallback::OnCancel() {
+  // After cancel, mCurrentIndex may be updated.
+  // At cancel case, use mCurrentIndex for DispatchEnd.
+  mSpeakTextLen = 0;
+  // Purge all the previous utterances and speak an empty string
+  if (FAILED(mSapiClient->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr))) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiCallback::OnVolumeChanged(float aVolume) {
+  mSapiClient->SetVolume(static_cast<USHORT>(aVolume * 100));
+  return NS_OK;
+}
+
+void SapiCallback::OnSpeechEvent(const SPEVENT& speechEvent) {
+  switch (speechEvent.eEventId) {
+    case SPEI_START_INPUT_STREAM:
+      mTask->DispatchStart();
+      break;
+    case SPEI_END_INPUT_STREAM:
+      if (mSpeakTextLen) {
+        mCurrentIndex = mSpeakTextLen;
+      }
+      mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
+      mTask = nullptr;
+      break;
+    case SPEI_TTS_BOOKMARK:
+      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
+      mTask->DispatchBoundary(u"mark"_ns, GetTimeDurationFromStart(),
+                              mCurrentIndex, 0, 0);
+      break;
+    case SPEI_WORD_BOUNDARY:
+      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
+      mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(),
+                              mCurrentIndex,
+                              static_cast<ULONG>(speechEvent.wParam), 1);
+      break;
+    case SPEI_SENTENCE_BOUNDARY:
+      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
+      mTask->DispatchBoundary(u"sentence"_ns, GetTimeDurationFromStart(),
+                              mCurrentIndex,
+                              static_cast<ULONG>(speechEvent.wParam), 1);
+      break;
+    default:
+      break;
+  }
+}
+
+// static
+void __stdcall SapiService::SpeechEventCallback(WPARAM aWParam,
+                                                LPARAM aLParam) {
+  RefPtr<ISpVoice> spVoice = (ISpVoice*)aWParam;
+  RefPtr<SapiService> service = (SapiService*)aLParam;
+
+  SPEVENT speechEvent;
+  while (spVoice->GetEvents(1, &speechEvent, nullptr) == S_OK) {
+    for (size_t i = 0; i < service->mCallbacks.Length(); i++) {
+      RefPtr<SapiCallback> callback = service->mCallbacks[i];
+      if (callback->GetStreamNum() == speechEvent.ulStreamNum) {
+        callback->OnSpeechEvent(speechEvent);
+        if (speechEvent.eEventId == SPEI_END_INPUT_STREAM) {
+          service->mCallbacks.RemoveElementAt(i);
+        }
+        break;
+      }
+    }
+  }
+}
+
+NS_INTERFACE_MAP_BEGIN(SapiService)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(SapiService)
+NS_IMPL_RELEASE(SapiService)
+
+SapiService::SapiService() : mInitialized(false) {}
+
+SapiService::~SapiService() {}
+
+bool SapiService::Init() {
+  AUTO_PROFILER_LABEL("SapiService::Init", OTHER);
+
+  MOZ_ASSERT(!mInitialized);
+
+  if (Preferences::GetBool("media.webspeech.synth.test") ||
+      !StaticPrefs::media_webspeech_synth_enabled()) {
+    // When enabled, we shouldn't add OS backend (Bug 1160844)
+    return false;
+  }
+
+  // Get all the voices from sapi and register in the SynthVoiceRegistry
+  if (!RegisterVoices()) {
+    return false;
+  }
+
+  mInitialized = true;
+  return true;
+}
+
+already_AddRefed<ISpVoice> SapiService::InitSapiInstance() {
+  RefPtr<ISpVoice> spVoice;
+  if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice,
+                              getter_AddRefs(spVoice)))) {
+    return nullptr;
+  }
+
+  // Set interest for all the events we are interested in
+  ULONGLONG eventMask = SPFEI(SPEI_START_INPUT_STREAM) |
+                        SPFEI(SPEI_TTS_BOOKMARK) | SPFEI(SPEI_WORD_BOUNDARY) |
+                        SPFEI(SPEI_SENTENCE_BOUNDARY) |
+                        SPFEI(SPEI_END_INPUT_STREAM);
+
+  if (FAILED(spVoice->SetInterest(eventMask, eventMask))) {
+    return nullptr;
+  }
+
+  // Set the callback function for receiving the events
+  spVoice->SetNotifyCallbackFunction(
+      (SPNOTIFYCALLBACK*)SapiService::SpeechEventCallback,
+      (WPARAM)spVoice.get(), (LPARAM)this);
+
+  return spVoice.forget();
+}
+
+bool SapiService::RegisterVoices() {
+  nsCOMPtr<nsISynthVoiceRegistry> registry =
+      do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID);
+  if (!registry) {
+    return false;
+  }
+  bool result = RegisterVoices(registry, kSpCategoryOneCoreVoices);
+  result |= RegisterVoices(registry, SPCAT_VOICES);
+  if (result) {
+    registry->NotifyVoicesChanged();
+  }
+  return result;
+}
+
+bool SapiService::RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry,
+                                 const WCHAR* categoryId) {
+  nsresult rv;
+
+  RefPtr<ISpObjectTokenCategory> category;
+  if (FAILED(CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL,
+                              IID_ISpObjectTokenCategory,
+                              getter_AddRefs(category)))) {
+    return false;
+  }
+  if (FAILED(category->SetId(categoryId, FALSE))) {
+    return false;
+  }
+
+  RefPtr<IEnumSpObjectTokens> voiceTokens;
+  if (FAILED(category->EnumTokens(nullptr, nullptr,
+                                  getter_AddRefs(voiceTokens)))) {
+    return false;
+  }
+
+  WCHAR locale[LOCALE_NAME_MAX_LENGTH];
+  while (true) {
+    RefPtr<ISpObjectToken> voiceToken;
+    if (voiceTokens->Next(1, getter_AddRefs(voiceToken), nullptr) != S_OK) {
+      break;
+    }
+
+    RefPtr<ISpDataKey> attributes;
+    if (FAILED(
+            voiceToken->OpenKey(L"Attributes", getter_AddRefs(attributes)))) {
+      continue;
+    }
+
+    WCHAR* language = nullptr;
+    if (FAILED(attributes->GetStringValue(L"Language", &language))) {
+      continue;
+    }
+
+    // Language attribute is LCID by hex.  So we need convert to locale
+    // name.
+    nsAutoString hexLcid;
+    LCID lcid = wcstol(language, nullptr, 16);
+    CoTaskMemFree(language);
+    if (NS_WARN_IF(
+            !LCIDToLocaleName(lcid, locale, LOCALE_NAME_MAX_LENGTH, 0))) {
+      continue;
+    }
+
+    WCHAR* description = nullptr;
+    if (FAILED(voiceToken->GetStringValue(nullptr, &description))) {
+      continue;
+    }
+
+    nsAutoString uri;
+    uri.AssignLiteral("urn:moz-tts:sapi:");
+    uri.Append(description);
+    uri.AppendLiteral("?");
+    uri.Append(locale);
+
+    // This service can only speak one utterance at a time, se we set
+    // aQueuesUtterances to true in order to track global state and schedule
+    // access to this service.
+    rv = registry->AddVoice(this, uri, nsDependentString(description),
+                            nsDependentString(locale), true, true);
+    CoTaskMemFree(description);
+    if (NS_FAILED(rv)) {
+      continue;
+    }
+
+    mVoices.InsertOrUpdate(uri, std::move(voiceToken));
+  }
+
+  return true;
+}
+
+NS_IMETHODIMP
+SapiService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume,
+                   float aRate, float aPitch, nsISpeechTask* aTask) {
+  NS_ENSURE_TRUE(mInitialized, NS_ERROR_NOT_AVAILABLE);
+
+  RefPtr<ISpObjectToken> voiceToken;
+  if (!mVoices.Get(aUri, getter_AddRefs(voiceToken))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  RefPtr<ISpVoice> spVoice = InitSapiInstance();
+  if (!spVoice) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (FAILED(spVoice->SetVoice(voiceToken))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (FAILED(spVoice->SetVolume(static_cast<USHORT>(aVolume * 100)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  // The max supported rate in SAPI engines is 3x, and the min is 1/3x. It is
+  // expressed by an integer. 0 being normal rate, -10 is 1/3 and 10 is 3x.
+  // Values below and above that are allowed, but the engine may clip the rate
+  // to its maximum capable value.
+  // "Each increment between -10 and +10 is logarithmically distributed such
+  //  that incrementing or decrementing by 1 is multiplying or dividing the
+  //  rate by the 10th root of 3"
+  // https://msdn.microsoft.com/en-us/library/ee431826(v=vs.85).aspx
+  long rate = aRate != 0 ? static_cast<long>(10 * log10(aRate) / log10(3)) : 0;
+  if (FAILED(spVoice->SetRate(rate))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  // Set the pitch using xml
+  nsAutoString xml;
+  xml.AssignLiteral("<pitch absmiddle=\"");
+  // absmiddle doesn't allow float type
+  xml.AppendInt(static_cast<int32_t>(aPitch * 10.0f - 10.0f));
+  xml.AppendLiteral("\">");
+  uint32_t textOffset = xml.Length();
+
+  for (size_t i = 0; i < aText.Length(); i++) {
+    switch (aText[i]) {
+      case '&':
+        xml.AppendLiteral("&amp;");
+        break;
+      case '<':
+        xml.AppendLiteral("&lt;");
+        break;
+      case '>':
+        xml.AppendLiteral("&gt;");
+        break;
+      default:
+        xml.Append(aText[i]);
+        break;
+    }
+  }
+
+  xml.AppendLiteral("</pitch>");
+
+  RefPtr<SapiCallback> callback =
+      new SapiCallback(aTask, spVoice, textOffset, aText.Length());
+
+  // The last three parameters doesn't matter for an indirect service
+  nsresult rv = aTask->Setup(callback);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  ULONG streamNum;
+  if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) {
+    aTask->Setup(nullptr);
+    return NS_ERROR_FAILURE;
+  }
+
+  callback->SetStreamNum(streamNum);
+  // streamNum reassigns same value when last stream is finished even if
+  // callback for stream end isn't called
+  // So we cannot use data hashtable and has to add it to vector at last.
+  mCallbacks.AppendElement(callback);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiService::Observe(nsISupports* aSubject, const char* aTopic,
+                     const char16_t* aData) {
+  return NS_OK;
+}
+
+SapiService* SapiService::GetInstance() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (XRE_GetProcessType() != GeckoProcessType_Default) {
+    MOZ_ASSERT(false, "SapiService can only be started on main gecko process");
+    return nullptr;
+  }
+
+  if (!sSingleton) {
+    RefPtr<SapiService> service = new SapiService();
+    if (service->Init()) {
+      sSingleton = service;
+      ClearOnShutdown(&sSingleton);
+    }
+  }
+  return sSingleton;
+}
+
+already_AddRefed<SapiService> SapiService::GetInstanceForService() {
+  RefPtr<SapiService> sapiService = GetInstance();
+  return sapiService.forget();
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/windows/SapiService.h b/dom/media/webspeech/synth/windows/SapiService.h
new file mode 100644
index 0000000000..79cc20917b
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/SapiService.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SapiService_h
+#define mozilla_dom_SapiService_h
+
+#include "nsISpeechService.h"
+#include "nsIObserver.h"
+#include "nsRefPtrHashtable.h"
+#include "nsTArray.h"
+#include "mozilla/StaticPtr.h"
+
+#include <windows.h>
+#include <sapi.h>
+
+class nsISynthVoiceRegistry;
+
+namespace mozilla::dom {
+
+class SapiCallback;
+
+class SapiService final : public nsISpeechService, public nsIObserver {
+ public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+  NS_DECL_NSIOBSERVER
+
+  SapiService();
+  bool Init();
+
+  static SapiService* GetInstance();
+  static already_AddRefed<SapiService> GetInstanceForService();
+
+  static void __stdcall SpeechEventCallback(WPARAM aWParam, LPARAM aLParam);
+
+ private:
+  virtual ~SapiService();
+
+  already_AddRefed<ISpVoice> InitSapiInstance();
+  bool RegisterVoices();
+  bool RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry,
+                      const WCHAR* categoryId);
+
+  nsRefPtrHashtable<nsStringHashKey, ISpObjectToken> mVoices;
+  nsTArray<RefPtr<SapiCallback>> mCallbacks;
+
+  bool mInitialized;
+
+  static StaticRefPtr<SapiService> sSingleton;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/windows/components.conf b/dom/media/webspeech/synth/windows/components.conf
new file mode 100644
index 0000000000..bc9b83a43a
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'cid': '{21b4a45b-9806-4021-a706-d768ab0548f9}',
+        'contract_ids': ['@mozilla.org/synthsapi;1'],
+        'singleton': True,
+        'type': 'mozilla::dom::SapiService',
+        'headers': ['/dom/media/webspeech/synth/windows/SapiService.h'],
+        'constructor': 'mozilla::dom::SapiService::GetInstanceForService',
+        'categories': {"speech-synth-started": 'Sapi Speech Synth'},
+    },
+]
diff --git a/dom/media/webspeech/synth/windows/moz.build b/dom/media/webspeech/synth/windows/moz.build
new file mode 100644
index 0000000000..90bafe9ca7
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/moz.build
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+    "SapiService.cpp",
+]
+
+XPCOM_MANIFESTS += [
+    "components.conf",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"
-- 
cgit v1.2.3