diff options
Diffstat (limited to '')
-rw-r--r-- | dom/media/webspeech/synth/SpeechSynthesis.cpp | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp new file mode 100644 index 0000000000..20e3ef754b --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp @@ -0,0 +1,315 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupportsPrimitives.h" +#include "nsSpeechTask.h" +#include "mozilla/Logging.h" + +#include "mozilla/dom/Element.h" + +#include "mozilla/dom/SpeechSynthesisBinding.h" +#include "mozilla/dom/WindowGlobalChild.h" +#include "SpeechSynthesis.h" +#include "nsContentUtils.h" +#include "nsSynthVoiceRegistry.h" +#include "mozilla/dom/Document.h" +#include "nsIDocShell.h" + +#undef LOG +mozilla::LogModule* GetSpeechSynthLog() { + static mozilla::LazyLogModule sLog("SpeechSynthesis"); + + return sLog; +} +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +namespace mozilla::dom { + +NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis) + +NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis, + DOMEventTargetHelper) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue) + tmp->mVoiceCache.Clear(); + NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE +NS_IMPL_CYCLE_COLLECTION_UNLINK_END + +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis, + DOMEventTargetHelper) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue) + for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) { + cb.NoteXPCOMChild(voice); + } +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) +NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) + +NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper) +NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper) + +SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent) + : DOMEventTargetHelper(aParent), + mHoldQueue(false), + mInnerID(aParent->WindowID()) { + MOZ_ASSERT(NS_IsMainThread()); + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->AddObserver(this, "inner-window-destroyed", true); + obs->AddObserver(this, "synth-voices-changed", true); + } +} + +SpeechSynthesis::~SpeechSynthesis() = default; + +JSObject* SpeechSynthesis::WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) { + return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto); +} + +bool SpeechSynthesis::Pending() const { + // If we don't have any task, nothing is pending. If we have only one task, + // check if that task is currently pending. If we have more than one task, + // then the tasks after the first one are definitely pending. + return mSpeechQueue.Length() > 1 || + (mSpeechQueue.Length() == 1 && + (!mCurrentTask || mCurrentTask->IsPending())); +} + +bool SpeechSynthesis::Speaking() const { + // Check global speaking state if there is no active speaking task. + return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) || + nsSynthVoiceRegistry::GetInstance()->IsSpeaking(); +} + +bool SpeechSynthesis::Paused() const { + return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) || + (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused()); +} + +bool SpeechSynthesis::HasEmptyQueue() const { + return mSpeechQueue.Length() == 0; +} + +bool SpeechSynthesis::HasVoices() const { + uint32_t voiceCount = mVoiceCache.Count(); + if (voiceCount == 0) { + nsresult rv = + nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); + if (NS_WARN_IF(NS_FAILED(rv))) { + return false; + } + } + + return voiceCount != 0; +} + +void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) { + if (!mInnerID) { + return; + } + + mSpeechQueue.AppendElement(&aUtterance); + + if (mSpeechQueue.Length() == 1) { + RefPtr<WindowGlobalChild> wgc = + WindowGlobalChild::GetByInnerWindowId(mInnerID); + if (wgc) { + wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS); + } + + // If we only have one item in the queue, we aren't pre-paused, and + // we have voices available, speak it. + if (!mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } + } +} + +void SpeechSynthesis::AdvanceQueue() { + LOG(LogLevel::Debug, + ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length())); + + if (mSpeechQueue.IsEmpty()) { + return; + } + + RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0); + + nsAutoString docLang; + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + if (Document* doc = window ? window->GetExtantDoc() : nullptr) { + if (Element* elm = doc->GetHtmlElement()) { + elm->GetLang(docLang); + } + } + + mCurrentTask = + nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang); + + if (mCurrentTask) { + mCurrentTask->SetSpeechSynthesis(this); + } +} + +void SpeechSynthesis::Cancel() { + if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) { + // Remove all queued utterances except for current one, we will remove it + // in OnEnd + mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1); + } else { + mSpeechQueue.Clear(); + } + + if (mCurrentTask) { + mCurrentTask->Cancel(); + } +} + +void SpeechSynthesis::Pause() { + if (Paused()) { + return; + } + + if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) { + mCurrentTask->Pause(); + } else { + mHoldQueue = true; + } +} + +void SpeechSynthesis::Resume() { + if (!Paused()) { + return; + } + + mHoldQueue = false; + + if (mCurrentTask) { + mCurrentTask->Resume(); + } else { + AdvanceQueue(); + } +} + +void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) { + MOZ_ASSERT(mCurrentTask == aTask); + + if (!mSpeechQueue.IsEmpty()) { + mSpeechQueue.RemoveElementAt(0); + if (mSpeechQueue.IsEmpty()) { + RefPtr<WindowGlobalChild> wgc = + WindowGlobalChild::GetByInnerWindowId(mInnerID); + if (wgc) { + wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS); + } + } + } + + mCurrentTask = nullptr; + AdvanceQueue(); +} + +void SpeechSynthesis::GetVoices( + nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) { + aResult.Clear(); + uint32_t voiceCount = 0; + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr; + + if (nsContentUtils::ShouldResistFingerprinting(docShell, + RFPTarget::SpeechSynthesis)) { + return; + } + + nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); + if (NS_WARN_IF(NS_FAILED(rv))) { + return; + } + + nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this); + + for (uint32_t i = 0; i < voiceCount; i++) { + nsAutoString uri; + rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri); + + if (NS_FAILED(rv)) { + NS_WARNING("Failed to retrieve voice from registry"); + continue; + } + + SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri); + + if (!voice) { + voice = new SpeechSynthesisVoice(voiceParent, uri); + } + + aResult.AppendElement(voice); + } + + mVoiceCache.Clear(); + + for (uint32_t i = 0; i < aResult.Length(); i++) { + SpeechSynthesisVoice* voice = aResult[i]; + mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice}); + } +} + +// For testing purposes, allows us to cancel the current task that is +// misbehaving, and flush the queue. +void SpeechSynthesis::ForceEnd() { + if (mCurrentTask) { + mCurrentTask->ForceEnd(); + } +} + +NS_IMETHODIMP +SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + MOZ_ASSERT(NS_IsMainThread()); + + if (strcmp(aTopic, "inner-window-destroyed") == 0) { + nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject); + NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE); + + uint64_t innerID; + nsresult rv = wrapper->GetData(&innerID); + NS_ENSURE_SUCCESS(rv, rv); + + if (innerID == mInnerID) { + mInnerID = 0; + Cancel(); + + nsCOMPtr<nsIObserverService> obs = + mozilla::services::GetObserverService(); + if (obs) { + obs->RemoveObserver(this, "inner-window-destroyed"); + } + } + } else if (strcmp(aTopic, "synth-voices-changed") == 0) { + LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged")); + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr; + + if (!nsContentUtils::ShouldResistFingerprinting( + docShell, RFPTarget::SpeechSynthesis)) { + DispatchTrustedEvent(u"voiceschanged"_ns); + // If we have a pending item, and voices become available, speak it. + if (!mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } + } + } + + return NS_OK; +} + +} // namespace mozilla::dom |