summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/synth/SpeechSynthesis.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--dom/media/webspeech/synth/SpeechSynthesis.cpp315
1 files changed, 315 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp
new file mode 100644
index 0000000000..20e3ef754b
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -0,0 +1,315 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupportsPrimitives.h"
+#include "nsSpeechTask.h"
+#include "mozilla/Logging.h"
+
+#include "mozilla/dom/Element.h"
+
+#include "mozilla/dom/SpeechSynthesisBinding.h"
+#include "mozilla/dom/WindowGlobalChild.h"
+#include "SpeechSynthesis.h"
+#include "nsContentUtils.h"
+#include "nsSynthVoiceRegistry.h"
+#include "mozilla/dom/Document.h"
+#include "nsIDocShell.h"
+
+#undef LOG
+mozilla::LogModule* GetSpeechSynthLog() {
+ static mozilla::LazyLogModule sLog("SpeechSynthesis");
+
+ return sLog;
+}
+#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis)
+
+NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis,
+ DOMEventTargetHelper)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue)
+ tmp->mVoiceCache.Clear();
+ NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
+NS_IMPL_CYCLE_COLLECTION_UNLINK_END
+
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis,
+ DOMEventTargetHelper)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue)
+ for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) {
+ cb.NoteXPCOMChild(voice);
+ }
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)
+ NS_INTERFACE_MAP_ENTRY(nsIObserver)
+ NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
+
+NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
+
+SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent)
+ : DOMEventTargetHelper(aParent),
+ mHoldQueue(false),
+ mInnerID(aParent->WindowID()) {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ if (obs) {
+ obs->AddObserver(this, "inner-window-destroyed", true);
+ obs->AddObserver(this, "synth-voices-changed", true);
+ }
+}
+
+SpeechSynthesis::~SpeechSynthesis() = default;
+
+JSObject* SpeechSynthesis::WrapObject(JSContext* aCx,
+ JS::Handle<JSObject*> aGivenProto) {
+ return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+bool SpeechSynthesis::Pending() const {
+ // If we don't have any task, nothing is pending. If we have only one task,
+ // check if that task is currently pending. If we have more than one task,
+ // then the tasks after the first one are definitely pending.
+ return mSpeechQueue.Length() > 1 ||
+ (mSpeechQueue.Length() == 1 &&
+ (!mCurrentTask || mCurrentTask->IsPending()));
+}
+
+bool SpeechSynthesis::Speaking() const {
+ // Check global speaking state if there is no active speaking task.
+ return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) ||
+ nsSynthVoiceRegistry::GetInstance()->IsSpeaking();
+}
+
+bool SpeechSynthesis::Paused() const {
+ return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||
+ (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());
+}
+
+bool SpeechSynthesis::HasEmptyQueue() const {
+ return mSpeechQueue.Length() == 0;
+}
+
+bool SpeechSynthesis::HasVoices() const {
+ uint32_t voiceCount = mVoiceCache.Count();
+ if (voiceCount == 0) {
+ nsresult rv =
+ nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return false;
+ }
+ }
+
+ return voiceCount != 0;
+}
+
+void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) {
+ if (!mInnerID) {
+ return;
+ }
+
+ mSpeechQueue.AppendElement(&aUtterance);
+
+ if (mSpeechQueue.Length() == 1) {
+ RefPtr<WindowGlobalChild> wgc =
+ WindowGlobalChild::GetByInnerWindowId(mInnerID);
+ if (wgc) {
+ wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
+ }
+
+ // If we only have one item in the queue, we aren't pre-paused, and
+ // we have voices available, speak it.
+ if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+ AdvanceQueue();
+ }
+ }
+}
+
+void SpeechSynthesis::AdvanceQueue() {
+ LOG(LogLevel::Debug,
+ ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length()));
+
+ if (mSpeechQueue.IsEmpty()) {
+ return;
+ }
+
+ RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0);
+
+ nsAutoString docLang;
+ nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+ if (Document* doc = window ? window->GetExtantDoc() : nullptr) {
+ if (Element* elm = doc->GetHtmlElement()) {
+ elm->GetLang(docLang);
+ }
+ }
+
+ mCurrentTask =
+ nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang);
+
+ if (mCurrentTask) {
+ mCurrentTask->SetSpeechSynthesis(this);
+ }
+}
+
+void SpeechSynthesis::Cancel() {
+ if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
+ // Remove all queued utterances except for current one, we will remove it
+ // in OnEnd
+ mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1);
+ } else {
+ mSpeechQueue.Clear();
+ }
+
+ if (mCurrentTask) {
+ mCurrentTask->Cancel();
+ }
+}
+
+void SpeechSynthesis::Pause() {
+ if (Paused()) {
+ return;
+ }
+
+ if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
+ mCurrentTask->Pause();
+ } else {
+ mHoldQueue = true;
+ }
+}
+
+void SpeechSynthesis::Resume() {
+ if (!Paused()) {
+ return;
+ }
+
+ mHoldQueue = false;
+
+ if (mCurrentTask) {
+ mCurrentTask->Resume();
+ } else {
+ AdvanceQueue();
+ }
+}
+
+void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) {
+ MOZ_ASSERT(mCurrentTask == aTask);
+
+ if (!mSpeechQueue.IsEmpty()) {
+ mSpeechQueue.RemoveElementAt(0);
+ if (mSpeechQueue.IsEmpty()) {
+ RefPtr<WindowGlobalChild> wgc =
+ WindowGlobalChild::GetByInnerWindowId(mInnerID);
+ if (wgc) {
+ wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
+ }
+ }
+ }
+
+ mCurrentTask = nullptr;
+ AdvanceQueue();
+}
+
+void SpeechSynthesis::GetVoices(
+ nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) {
+ aResult.Clear();
+ uint32_t voiceCount = 0;
+ nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+ nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+ if (nsContentUtils::ShouldResistFingerprinting(docShell,
+ RFPTarget::SpeechSynthesis)) {
+ return;
+ }
+
+ nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return;
+ }
+
+ nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
+
+ for (uint32_t i = 0; i < voiceCount; i++) {
+ nsAutoString uri;
+ rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri);
+
+ if (NS_FAILED(rv)) {
+ NS_WARNING("Failed to retrieve voice from registry");
+ continue;
+ }
+
+ SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri);
+
+ if (!voice) {
+ voice = new SpeechSynthesisVoice(voiceParent, uri);
+ }
+
+ aResult.AppendElement(voice);
+ }
+
+ mVoiceCache.Clear();
+
+ for (uint32_t i = 0; i < aResult.Length(); i++) {
+ SpeechSynthesisVoice* voice = aResult[i];
+ mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice});
+ }
+}
+
+// For testing purposes, allows us to cancel the current task that is
+// misbehaving, and flush the queue.
+void SpeechSynthesis::ForceEnd() {
+ if (mCurrentTask) {
+ mCurrentTask->ForceEnd();
+ }
+}
+
+NS_IMETHODIMP
+SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic,
+ const char16_t* aData) {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ if (strcmp(aTopic, "inner-window-destroyed") == 0) {
+ nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject);
+ NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE);
+
+ uint64_t innerID;
+ nsresult rv = wrapper->GetData(&innerID);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (innerID == mInnerID) {
+ mInnerID = 0;
+ Cancel();
+
+ nsCOMPtr<nsIObserverService> obs =
+ mozilla::services::GetObserverService();
+ if (obs) {
+ obs->RemoveObserver(this, "inner-window-destroyed");
+ }
+ }
+ } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
+ LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
+ nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+ nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+ if (!nsContentUtils::ShouldResistFingerprinting(
+ docShell, RFPTarget::SpeechSynthesis)) {
+ DispatchTrustedEvent(u"voiceschanged"_ns);
+ // If we have a pending item, and voices become available, speak it.
+ if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+ AdvanceQueue();
+ }
+ }
+ }
+
+ return NS_OK;
+}
+
+} // namespace mozilla::dom