diff options
Diffstat (limited to 'dom/media/webspeech/synth')
68 files changed, 7057 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp new file mode 100644 index 0000000000..f26e36a3f4 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp @@ -0,0 +1,333 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupportsPrimitives.h" +#include "nsSpeechTask.h" +#include "mozilla/Logging.h" + +#include "mozilla/dom/Element.h" + +#include "mozilla/dom/SpeechSynthesisBinding.h" +#include "mozilla/dom/WindowGlobalChild.h" +#include "SpeechSynthesis.h" +#include "nsContentUtils.h" +#include "nsSynthVoiceRegistry.h" +#include "mozilla/dom/Document.h" +#include "nsIDocShell.h" + +#undef LOG +mozilla::LogModule* GetSpeechSynthLog() { + static mozilla::LazyLogModule sLog("SpeechSynthesis"); + + return sLog; +} +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +namespace mozilla::dom { + +NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis) + +NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis, + DOMEventTargetHelper) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue) + tmp->mVoiceCache.Clear(); + NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE +NS_IMPL_CYCLE_COLLECTION_UNLINK_END + +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis, + DOMEventTargetHelper) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue) + for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) { + cb.NoteXPCOMChild(voice); + } +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) +NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) + +NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper) +NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper) + +SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent) + : DOMEventTargetHelper(aParent), + mHoldQueue(false), + mInnerID(aParent->WindowID()) { + MOZ_ASSERT(NS_IsMainThread()); + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->AddObserver(this, "inner-window-destroyed", true); + obs->AddObserver(this, "synth-voices-changed", true); + obs->AddObserver(this, "synth-voices-error", true); + } +} + +SpeechSynthesis::~SpeechSynthesis() = default; + +JSObject* SpeechSynthesis::WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) { + return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto); +} + +bool SpeechSynthesis::Pending() const { + // If we don't have any task, nothing is pending. If we have only one task, + // check if that task is currently pending. If we have more than one task, + // then the tasks after the first one are definitely pending. + return mSpeechQueue.Length() > 1 || + (mSpeechQueue.Length() == 1 && + (!mCurrentTask || mCurrentTask->IsPending())); +} + +bool SpeechSynthesis::Speaking() const { + // Check global speaking state if there is no active speaking task. + return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) || + nsSynthVoiceRegistry::GetInstance()->IsSpeaking(); +} + +bool SpeechSynthesis::Paused() const { + return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) || + (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused()); +} + +bool SpeechSynthesis::HasEmptyQueue() const { + return mSpeechQueue.Length() == 0; +} + +bool SpeechSynthesis::HasVoices() const { + uint32_t voiceCount = mVoiceCache.Count(); + if (voiceCount == 0) { + nsresult rv = + nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); + if (NS_WARN_IF(NS_FAILED(rv))) { + return false; + } + } + + return voiceCount != 0; +} + +void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) { + if (!mInnerID) { + return; + } + + mSpeechQueue.AppendElement(&aUtterance); + + if (mSpeechQueue.Length() == 1) { + RefPtr<WindowGlobalChild> wgc = + WindowGlobalChild::GetByInnerWindowId(mInnerID); + if (wgc) { + wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS); + } + + // If we only have one item in the queue, we aren't pre-paused, and + // we have voices available, speak it. + if (!mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } + } +} + +void SpeechSynthesis::AdvanceQueue() { + LOG(LogLevel::Debug, + ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length())); + + if (mSpeechQueue.IsEmpty()) { + return; + } + + RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0); + + nsAutoString docLang; + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + if (Document* doc = window ? window->GetExtantDoc() : nullptr) { + if (Element* elm = doc->GetHtmlElement()) { + elm->GetLang(docLang); + } + } + + mCurrentTask = + nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang); + + if (mCurrentTask) { + mCurrentTask->SetSpeechSynthesis(this); + } +} + +void SpeechSynthesis::Cancel() { + if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) { + // Remove all queued utterances except for current one, we will remove it + // in OnEnd + mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1); + } else { + mSpeechQueue.Clear(); + } + + if (mCurrentTask) { + mCurrentTask->Cancel(); + } +} + +void SpeechSynthesis::Pause() { + if (Paused()) { + return; + } + + if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) { + mCurrentTask->Pause(); + } else { + mHoldQueue = true; + } +} + +void SpeechSynthesis::Resume() { + if (!Paused()) { + return; + } + + mHoldQueue = false; + + if (mCurrentTask) { + mCurrentTask->Resume(); + } else { + AdvanceQueue(); + } +} + +void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) { + MOZ_ASSERT(mCurrentTask == aTask); + + if (!mSpeechQueue.IsEmpty()) { + mSpeechQueue.RemoveElementAt(0); + if (mSpeechQueue.IsEmpty()) { + RefPtr<WindowGlobalChild> wgc = + WindowGlobalChild::GetByInnerWindowId(mInnerID); + if (wgc) { + wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS); + } + } + } + + mCurrentTask = nullptr; + AdvanceQueue(); +} + +void SpeechSynthesis::GetVoices( + nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) { + aResult.Clear(); + uint32_t voiceCount = 0; + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr; + + if (nsContentUtils::ShouldResistFingerprinting(docShell, + RFPTarget::SpeechSynthesis)) { + return; + } + + nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); + if (NS_WARN_IF(NS_FAILED(rv))) { + return; + } + + nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this); + + for (uint32_t i = 0; i < voiceCount; i++) { + nsAutoString uri; + rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri); + + if (NS_FAILED(rv)) { + NS_WARNING("Failed to retrieve voice from registry"); + continue; + } + + SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri); + + if (!voice) { + voice = new SpeechSynthesisVoice(voiceParent, uri); + } + + aResult.AppendElement(voice); + } + + mVoiceCache.Clear(); + + for (uint32_t i = 0; i < aResult.Length(); i++) { + SpeechSynthesisVoice* voice = aResult[i]; + mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice}); + } +} + +// For testing purposes, allows us to cancel the current task that is +// misbehaving, and flush the queue. +void SpeechSynthesis::ForceEnd() { + if (mCurrentTask) { + mCurrentTask->ForceEnd(); + } +} + +NS_IMETHODIMP +SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + MOZ_ASSERT(NS_IsMainThread()); + + if (strcmp(aTopic, "inner-window-destroyed") == 0) { + nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject); + NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE); + + uint64_t innerID; + nsresult rv = wrapper->GetData(&innerID); + NS_ENSURE_SUCCESS(rv, rv); + + if (innerID == mInnerID) { + mInnerID = 0; + Cancel(); + + nsCOMPtr<nsIObserverService> obs = + mozilla::services::GetObserverService(); + if (obs) { + obs->RemoveObserver(this, "inner-window-destroyed"); + } + } + } else if (strcmp(aTopic, "synth-voices-changed") == 0) { + LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged")); + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr; + + if (!nsContentUtils::ShouldResistFingerprinting( + docShell, RFPTarget::SpeechSynthesis)) { + DispatchTrustedEvent(u"voiceschanged"_ns); + // If we have a pending item, and voices become available, speak it. + if (!mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } + } + } else if (strcmp(aTopic, "synth-voices-error") == 0) { + NS_WARNING("SpeechSynthesis::Observe: synth-voices-error"); + LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceserror")); + nsCOMPtr<nsPIDOMWindowInner> window = GetOwner(); + + nsCOMPtr<nsIObserverService> obs = services::GetObserverService(); + if (obs) { + obs->NotifyObservers(window, "chrome-synth-voices-error", aData); + } + + if (!mSpeechQueue.IsEmpty()) { + for (RefPtr<SpeechSynthesisUtterance>& utterance : mSpeechQueue) { + utterance->DispatchSpeechSynthesisEvent(u"error"_ns, 0, nullptr, 0, + u""_ns); + } + mSpeechQueue.Clear(); + } + } + + return NS_OK; +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/SpeechSynthesis.h b/dom/media/webspeech/synth/SpeechSynthesis.h new file mode 100644 index 0000000000..1227261b59 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesis.h @@ -0,0 +1,88 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesis_h +#define mozilla_dom_SpeechSynthesis_h + +#include "nsCOMPtr.h" +#include "nsIObserver.h" +#include "nsRefPtrHashtable.h" +#include "nsString.h" +#include "nsWeakReference.h" +#include "nsWrapperCache.h" +#include "js/TypeDecls.h" + +#include "SpeechSynthesisUtterance.h" +#include "SpeechSynthesisVoice.h" + +class nsIDOMWindow; + +namespace mozilla::dom { + +class nsSpeechTask; + +class SpeechSynthesis final : public DOMEventTargetHelper, + public nsIObserver, + public nsSupportsWeakReference { + public: + explicit SpeechSynthesis(nsPIDOMWindowInner* aParent); + + NS_DECL_ISUPPORTS_INHERITED + NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechSynthesis, + DOMEventTargetHelper) + NS_DECL_NSIOBSERVER + + JSObject* WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) override; + + bool Pending() const; + + bool Speaking() const; + + bool Paused() const; + + bool HasEmptyQueue() const; + + void Speak(SpeechSynthesisUtterance& aUtterance); + + void Cancel(); + + void Pause(); + + void Resume(); + + void OnEnd(const nsSpeechTask* aTask); + + void GetVoices(nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult); + + void ForceEnd(); + + IMPL_EVENT_HANDLER(voiceschanged) + + private: + virtual ~SpeechSynthesis(); + + void AdvanceQueue(); + + bool HasVoices() const; + + bool HasSpeakingTask() const { + return mCurrentTask && mCurrentTask->IsSpeaking(); + } + + nsTArray<RefPtr<SpeechSynthesisUtterance> > mSpeechQueue; + + RefPtr<nsSpeechTask> mCurrentTask; + + nsRefPtrHashtable<nsStringHashKey, SpeechSynthesisVoice> mVoiceCache; + + bool mHoldQueue; + + uint64_t mInnerID; +}; + +} // namespace mozilla::dom +#endif diff --git a/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp b/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp new file mode 100644 index 0000000000..4d8dcd5c12 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp @@ -0,0 +1,137 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsCycleCollectionParticipant.h" +#include "nsGkAtoms.h" + +#include "mozilla/dom/SpeechSynthesisEvent.h" +#include "mozilla/dom/SpeechSynthesisUtteranceBinding.h" +#include "SpeechSynthesisUtterance.h" +#include "SpeechSynthesisVoice.h" + +#include <stdlib.h> + +namespace mozilla::dom { + +NS_IMPL_CYCLE_COLLECTION_INHERITED(SpeechSynthesisUtterance, + DOMEventTargetHelper, mVoice); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesisUtterance) +NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) + +NS_IMPL_ADDREF_INHERITED(SpeechSynthesisUtterance, DOMEventTargetHelper) +NS_IMPL_RELEASE_INHERITED(SpeechSynthesisUtterance, DOMEventTargetHelper) + +SpeechSynthesisUtterance::SpeechSynthesisUtterance( + nsPIDOMWindowInner* aOwnerWindow, const nsAString& text) + : DOMEventTargetHelper(aOwnerWindow), + mText(text), + mVolume(1), + mRate(1), + mPitch(1), + mPaused(false), + mShouldResistFingerprinting( + aOwnerWindow->AsGlobal()->ShouldResistFingerprinting( + RFPTarget::SpeechSynthesis)) {} + +SpeechSynthesisUtterance::~SpeechSynthesisUtterance() = default; + +JSObject* SpeechSynthesisUtterance::WrapObject( + JSContext* aCx, JS::Handle<JSObject*> aGivenProto) { + return SpeechSynthesisUtterance_Binding::Wrap(aCx, this, aGivenProto); +} + +nsISupports* SpeechSynthesisUtterance::GetParentObject() const { + return GetOwner(); +} + +already_AddRefed<SpeechSynthesisUtterance> +SpeechSynthesisUtterance::Constructor(GlobalObject& aGlobal, ErrorResult& aRv) { + return Constructor(aGlobal, u""_ns, aRv); +} + +already_AddRefed<SpeechSynthesisUtterance> +SpeechSynthesisUtterance::Constructor(GlobalObject& aGlobal, + const nsAString& aText, + ErrorResult& aRv) { + nsCOMPtr<nsPIDOMWindowInner> win = do_QueryInterface(aGlobal.GetAsSupports()); + + if (!win) { + aRv.Throw(NS_ERROR_FAILURE); + return nullptr; + } + + RefPtr<SpeechSynthesisUtterance> object = + new SpeechSynthesisUtterance(win, aText); + return object.forget(); +} + +void SpeechSynthesisUtterance::GetText(nsString& aResult) const { + aResult = mText; +} + +void SpeechSynthesisUtterance::SetText(const nsAString& aText) { + mText = aText; +} + +void SpeechSynthesisUtterance::GetLang(nsString& aResult) const { + aResult = mLang; +} + +void SpeechSynthesisUtterance::SetLang(const nsAString& aLang) { + mLang = aLang; +} + +SpeechSynthesisVoice* SpeechSynthesisUtterance::GetVoice() const { + return mVoice; +} + +void SpeechSynthesisUtterance::SetVoice(SpeechSynthesisVoice* aVoice) { + mVoice = aVoice; +} + +float SpeechSynthesisUtterance::Volume() const { return mVolume; } + +void SpeechSynthesisUtterance::SetVolume(float aVolume) { + mVolume = std::max<float>(std::min<float>(aVolume, 1), 0); +} + +float SpeechSynthesisUtterance::Rate() const { return mRate; } + +void SpeechSynthesisUtterance::SetRate(float aRate) { + mRate = std::max<float>(std::min<float>(aRate, 10), 0.1f); +} + +float SpeechSynthesisUtterance::Pitch() const { return mPitch; } + +void SpeechSynthesisUtterance::SetPitch(float aPitch) { + mPitch = std::max<float>(std::min<float>(aPitch, 2), 0); +} + +void SpeechSynthesisUtterance::GetChosenVoiceURI(nsString& aResult) const { + aResult = mChosenVoiceURI; +} + +void SpeechSynthesisUtterance::DispatchSpeechSynthesisEvent( + const nsAString& aEventType, uint32_t aCharIndex, + const Nullable<uint32_t>& aCharLength, float aElapsedTime, + const nsAString& aName) { + SpeechSynthesisEventInit init; + init.mBubbles = false; + init.mCancelable = false; + init.mUtterance = this; + init.mCharIndex = aCharIndex; + init.mCharLength = aCharLength; + init.mElapsedTime = aElapsedTime; + init.mName = aName; + + RefPtr<SpeechSynthesisEvent> event = + SpeechSynthesisEvent::Constructor(this, aEventType, init); + DispatchTrustedEvent(event); +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/SpeechSynthesisUtterance.h b/dom/media/webspeech/synth/SpeechSynthesisUtterance.h new file mode 100644 index 0000000000..17958a3b32 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisUtterance.h @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisUtterance_h +#define mozilla_dom_SpeechSynthesisUtterance_h + +#include "mozilla/DOMEventTargetHelper.h" +#include "nsCOMPtr.h" +#include "nsString.h" +#include "js/TypeDecls.h" + +#include "nsSpeechTask.h" + +namespace mozilla::dom { + +class SpeechSynthesisVoice; +class SpeechSynthesis; +class nsSynthVoiceRegistry; + +class SpeechSynthesisUtterance final : public DOMEventTargetHelper { + friend class SpeechSynthesis; + friend class nsSpeechTask; + friend class nsSynthVoiceRegistry; + + public: + SpeechSynthesisUtterance(nsPIDOMWindowInner* aOwnerWindow, + const nsAString& aText); + + NS_DECL_ISUPPORTS_INHERITED + NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechSynthesisUtterance, + DOMEventTargetHelper) + + nsISupports* GetParentObject() const; + + JSObject* WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) override; + + static already_AddRefed<SpeechSynthesisUtterance> Constructor( + GlobalObject& aGlobal, ErrorResult& aRv); + static already_AddRefed<SpeechSynthesisUtterance> Constructor( + GlobalObject& aGlobal, const nsAString& aText, ErrorResult& aRv); + + void GetText(nsString& aResult) const; + + void SetText(const nsAString& aText); + + void GetLang(nsString& aResult) const; + + void SetLang(const nsAString& aLang); + + SpeechSynthesisVoice* GetVoice() const; + + void SetVoice(SpeechSynthesisVoice* aVoice); + + float Volume() const; + + void SetVolume(float aVolume); + + float Rate() const; + + void SetRate(float aRate); + + float Pitch() const; + + void SetPitch(float aPitch); + + void GetChosenVoiceURI(nsString& aResult) const; + + bool IsPaused() { return mPaused; } + + bool ShouldResistFingerprinting() const { + return mShouldResistFingerprinting; + } + + IMPL_EVENT_HANDLER(start) + IMPL_EVENT_HANDLER(end) + IMPL_EVENT_HANDLER(error) + IMPL_EVENT_HANDLER(pause) + IMPL_EVENT_HANDLER(resume) + IMPL_EVENT_HANDLER(mark) + IMPL_EVENT_HANDLER(boundary) + + private: + virtual ~SpeechSynthesisUtterance(); + + void DispatchSpeechSynthesisEvent(const nsAString& aEventType, + uint32_t aCharIndex, + const Nullable<uint32_t>& aCharLength, + float aElapsedTime, const nsAString& aName); + + nsString mText; + + nsString mLang; + + float mVolume; + + float mRate; + + float mPitch; + + nsString mChosenVoiceURI; + + bool mPaused; + + RefPtr<SpeechSynthesisVoice> mVoice; + + bool mShouldResistFingerprinting; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp b/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp new file mode 100644 index 0000000000..a309daca26 --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesis.h" +#include "nsSynthVoiceRegistry.h" +#include "mozilla/dom/SpeechSynthesisVoiceBinding.h" + +namespace mozilla::dom { + +NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechSynthesisVoice, mParent) +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechSynthesisVoice) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechSynthesisVoice) +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesisVoice) + NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY + NS_INTERFACE_MAP_ENTRY(nsISupports) +NS_INTERFACE_MAP_END + +SpeechSynthesisVoice::SpeechSynthesisVoice(nsISupports* aParent, + const nsAString& aUri) + : mParent(aParent), mUri(aUri) {} + +SpeechSynthesisVoice::~SpeechSynthesisVoice() = default; + +JSObject* SpeechSynthesisVoice::WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) { + return SpeechSynthesisVoice_Binding::Wrap(aCx, this, aGivenProto); +} + +nsISupports* SpeechSynthesisVoice::GetParentObject() const { return mParent; } + +void SpeechSynthesisVoice::GetVoiceURI(nsString& aRetval) const { + aRetval = mUri; +} + +void SpeechSynthesisVoice::GetName(nsString& aRetval) const { + DebugOnly<nsresult> rv = + nsSynthVoiceRegistry::GetInstance()->GetVoiceName(mUri, aRetval); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Failed to get SpeechSynthesisVoice.name"); +} + +void SpeechSynthesisVoice::GetLang(nsString& aRetval) const { + DebugOnly<nsresult> rv = + nsSynthVoiceRegistry::GetInstance()->GetVoiceLang(mUri, aRetval); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Failed to get SpeechSynthesisVoice.lang"); +} + +bool SpeechSynthesisVoice::LocalService() const { + bool isLocal; + DebugOnly<nsresult> rv = + nsSynthVoiceRegistry::GetInstance()->IsLocalVoice(mUri, &isLocal); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Failed to get SpeechSynthesisVoice.localService"); + + return isLocal; +} + +bool SpeechSynthesisVoice::Default() const { + bool isDefault; + DebugOnly<nsresult> rv = + nsSynthVoiceRegistry::GetInstance()->IsDefaultVoice(mUri, &isDefault); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Failed to get SpeechSynthesisVoice.default"); + + return isDefault; +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/SpeechSynthesisVoice.h b/dom/media/webspeech/synth/SpeechSynthesisVoice.h new file mode 100644 index 0000000000..079e5f49ea --- /dev/null +++ b/dom/media/webspeech/synth/SpeechSynthesisVoice.h @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisVoice_h +#define mozilla_dom_SpeechSynthesisVoice_h + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsWrapperCache.h" +#include "js/TypeDecls.h" + +namespace mozilla::dom { + +class nsSynthVoiceRegistry; +class SpeechSynthesis; + +class SpeechSynthesisVoice final : public nsISupports, public nsWrapperCache { + friend class nsSynthVoiceRegistry; + friend class SpeechSynthesis; + + public: + SpeechSynthesisVoice(nsISupports* aParent, const nsAString& aUri); + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechSynthesisVoice) + + nsISupports* GetParentObject() const; + + JSObject* WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) override; + + void GetVoiceURI(nsString& aRetval) const; + + void GetName(nsString& aRetval) const; + + void GetLang(nsString& aRetval) const; + + bool LocalService() const; + + bool Default() const; + + private: + virtual ~SpeechSynthesisVoice(); + + nsCOMPtr<nsISupports> mParent; + + nsString mUri; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/android/SpeechSynthesisService.cpp b/dom/media/webspeech/synth/android/SpeechSynthesisService.cpp new file mode 100644 index 0000000000..1b6e4b6125 --- /dev/null +++ b/dom/media/webspeech/synth/android/SpeechSynthesisService.cpp @@ -0,0 +1,215 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesisService.h" + +#include <android/log.h> + +#include "nsXULAppAPI.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/jni/Utils.h" +#include "mozilla/Preferences.h" +#include "mozilla/StaticPrefs_media.h" + +#define ALOG(args...) \ + __android_log_print(ANDROID_LOG_INFO, "GeckoSpeechSynthesis", ##args) + +namespace mozilla { +namespace dom { + +StaticRefPtr<SpeechSynthesisService> SpeechSynthesisService::sSingleton; + +class AndroidSpeechCallback final : public nsISpeechTaskCallback { + public: + AndroidSpeechCallback() {} + + NS_DECL_ISUPPORTS + + NS_IMETHOD OnResume() override { return NS_OK; } + + NS_IMETHOD OnPause() override { return NS_OK; } + + NS_IMETHOD OnCancel() override { + java::SpeechSynthesisService::Stop(); + return NS_OK; + } + + NS_IMETHOD OnVolumeChanged(float aVolume) override { return NS_OK; } + + private: + ~AndroidSpeechCallback() {} +}; + +NS_IMPL_ISUPPORTS(AndroidSpeechCallback, nsISpeechTaskCallback) + +NS_IMPL_ISUPPORTS(SpeechSynthesisService, nsISpeechService) + +void SpeechSynthesisService::Setup() { + ALOG("SpeechSynthesisService::Setup"); + + if (!StaticPrefs::media_webspeech_synth_enabled() || + Preferences::GetBool("media.webspeech.synth.test")) { + return; + } + + if (!jni::IsAvailable()) { + NS_WARNING("Failed to initialize speech synthesis"); + return; + } + + Init(); + java::SpeechSynthesisService::InitSynth(); +} + +// nsISpeechService + +NS_IMETHODIMP +SpeechSynthesisService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) { + if (mTask) { + NS_WARNING("Service only supports one speech task at a time."); + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr<AndroidSpeechCallback> callback = new AndroidSpeechCallback(); + nsresult rv = aTask->Setup(callback); + + if (NS_FAILED(rv)) { + return rv; + } + + jni::String::LocalRef utteranceId = + java::SpeechSynthesisService::Speak(aUri, aText, aRate, aPitch, aVolume); + if (!utteranceId) { + return NS_ERROR_NOT_AVAILABLE; + } + + mTaskUtteranceId = utteranceId->ToCString(); + mTask = aTask; + mTaskTextLength = aText.Length(); + mTaskTextOffset = 0; + + return NS_OK; +} + +SpeechSynthesisService* SpeechSynthesisService::GetInstance(bool aCreate) { + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT( + false, + "SpeechSynthesisService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton && aCreate) { + sSingleton = new SpeechSynthesisService(); + sSingleton->Setup(); + ClearOnShutdown(&sSingleton); + } + + return sSingleton; +} + +already_AddRefed<SpeechSynthesisService> +SpeechSynthesisService::GetInstanceForService() { + MOZ_ASSERT(NS_IsMainThread()); + RefPtr<SpeechSynthesisService> sapiService = GetInstance(); + return sapiService.forget(); +} + +// JNI + +void SpeechSynthesisService::RegisterVoice(jni::String::Param aUri, + jni::String::Param aName, + jni::String::Param aLocale, + bool aIsNetwork, bool aIsDefault) { + nsSynthVoiceRegistry* registry = nsSynthVoiceRegistry::GetInstance(); + SpeechSynthesisService* service = SpeechSynthesisService::GetInstance(false); + // This service can only speak one utterance at a time, so we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + DebugOnly<nsresult> rv = + registry->AddVoice(service, aUri->ToString(), aName->ToString(), + aLocale->ToString(), !aIsNetwork, true); + + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice"); + + if (aIsDefault) { + DebugOnly<nsresult> rv = registry->SetDefaultVoice(aUri->ToString(), true); + + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to set voice as default"); + } +} + +void SpeechSynthesisService::DoneRegisteringVoices() { + nsSynthVoiceRegistry* registry = nsSynthVoiceRegistry::GetInstance(); + registry->NotifyVoicesChanged(); +} + +void SpeechSynthesisService::DispatchStart(jni::String::Param aUtteranceId) { + if (sSingleton) { + MOZ_ASSERT(sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString())); + nsCOMPtr<nsISpeechTask> task = sSingleton->mTask; + if (task) { + sSingleton->mTaskStartTime = TimeStamp::Now(); + DebugOnly<nsresult> rv = task->DispatchStart(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch start"); + } + } +} + +void SpeechSynthesisService::DispatchEnd(jni::String::Param aUtteranceId) { + if (sSingleton) { + // In API older than 23, we will sometimes call this function + // without providing an utterance ID. + MOZ_ASSERT(!aUtteranceId || + sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString())); + nsCOMPtr<nsISpeechTask> task = sSingleton->mTask; + sSingleton->mTask = nullptr; + if (task) { + TimeStamp startTime = sSingleton->mTaskStartTime; + DebugOnly<nsresult> rv = + task->DispatchEnd((TimeStamp::Now() - startTime).ToSeconds(), + sSingleton->mTaskTextLength); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch start"); + } + } +} + +void SpeechSynthesisService::DispatchError(jni::String::Param aUtteranceId) { + if (sSingleton) { + MOZ_ASSERT(sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString())); + nsCOMPtr<nsISpeechTask> task = sSingleton->mTask; + sSingleton->mTask = nullptr; + if (task) { + TimeStamp startTime = sSingleton->mTaskStartTime; + DebugOnly<nsresult> rv = + task->DispatchError((TimeStamp::Now() - startTime).ToSeconds(), + sSingleton->mTaskTextOffset); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch start"); + } + } +} + +void SpeechSynthesisService::DispatchBoundary(jni::String::Param aUtteranceId, + int32_t aStart, int32_t aEnd) { + if (sSingleton) { + MOZ_ASSERT(sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString())); + nsCOMPtr<nsISpeechTask> task = sSingleton->mTask; + if (task) { + TimeStamp startTime = sSingleton->mTaskStartTime; + sSingleton->mTaskTextOffset = aStart; + DebugOnly<nsresult> rv = task->DispatchBoundary( + u"word"_ns, (TimeStamp::Now() - startTime).ToSeconds(), aStart, + aEnd - aStart, 1); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch boundary"); + } + } +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/android/SpeechSynthesisService.h b/dom/media/webspeech/synth/android/SpeechSynthesisService.h new file mode 100644 index 0000000000..98c5143cf6 --- /dev/null +++ b/dom/media/webspeech/synth/android/SpeechSynthesisService.h @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisService_h +#define mozilla_dom_SpeechSynthesisService_h + +#include "nsISpeechService.h" +#include "mozilla/java/SpeechSynthesisServiceNatives.h" +#include "mozilla/StaticPtr.h" + +namespace mozilla { +namespace dom { + +class SpeechSynthesisService final + : public nsISpeechService, + public java::SpeechSynthesisService::Natives<SpeechSynthesisService> { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + + SpeechSynthesisService(){}; + + void Setup(); + + static void DoneRegisteringVoices(); + + static void RegisterVoice(jni::String::Param aUri, jni::String::Param aName, + jni::String::Param aLocale, bool aIsNetwork, + bool aIsDefault); + + static void DispatchStart(jni::String::Param aUtteranceId); + + static void DispatchEnd(jni::String::Param aUtteranceId); + + static void DispatchError(jni::String::Param aUtteranceId); + + static void DispatchBoundary(jni::String::Param aUtteranceId, int32_t aStart, + int32_t aEnd); + + static SpeechSynthesisService* GetInstance(bool aCreate = true); + static already_AddRefed<SpeechSynthesisService> GetInstanceForService(); + + static StaticRefPtr<SpeechSynthesisService> sSingleton; + + private: + virtual ~SpeechSynthesisService(){}; + + nsCOMPtr<nsISpeechTask> mTask; + + // Unique ID assigned to utterance when it is sent to system service. + nsCString mTaskUtteranceId; + + // Time stamp from the moment the utterance is started. + TimeStamp mTaskStartTime; + + // Length of text of the utterance. + uint32_t mTaskTextLength; + + // Current offset in characters of what has been spoken. + uint32_t mTaskTextOffset; +}; + +} // namespace dom +} // namespace mozilla +#endif diff --git a/dom/media/webspeech/synth/android/components.conf b/dom/media/webspeech/synth/android/components.conf new file mode 100644 index 0000000000..4c35954fcc --- /dev/null +++ b/dom/media/webspeech/synth/android/components.conf @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{311b2dab-f4d3-4be4-8123-6732313d95c2}', + 'contract_ids': ['@mozilla.org/androidspeechsynth;1'], + 'singleton': True, + 'type': 'mozilla::dom::SpeechSynthesisService', + 'headers': ['/dom/media/webspeech/synth/android/SpeechSynthesisService.h'], + 'constructor': 'mozilla::dom::SpeechSynthesisService::GetInstanceForService', + 'categories': {"speech-synth-started": 'Android Speech Synth'}, + }, +] diff --git a/dom/media/webspeech/synth/android/moz.build b/dom/media/webspeech/synth/android/moz.build new file mode 100644 index 0000000000..348c157f3c --- /dev/null +++ b/dom/media/webspeech/synth/android/moz.build @@ -0,0 +1,19 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS.mozilla.dom += ["SpeechSynthesisService.h"] + +UNIFIED_SOURCES += [ + "SpeechSynthesisService.cpp", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul" diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h new file mode 100644 index 0000000000..6148d59c92 --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_OsxSpeechSynthesizerService_h +#define mozilla_dom_OsxSpeechSynthesizerService_h + +#include "nsISpeechService.h" +#include "nsIObserver.h" +#include "mozilla/StaticPtr.h" + +namespace mozilla { +namespace dom { + +class OSXSpeechSynthesizerService final : public nsISpeechService, + public nsIObserver { + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + NS_DECL_NSIOBSERVER + + bool Init(); + + static OSXSpeechSynthesizerService* GetInstance(); + static already_AddRefed<OSXSpeechSynthesizerService> GetInstanceForService(); + + private: + OSXSpeechSynthesizerService(); + virtual ~OSXSpeechSynthesizerService() = default; + + bool RegisterVoices(); + + bool mInitialized; + static mozilla::StaticRefPtr<OSXSpeechSynthesizerService> sSingleton; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm new file mode 100644 index 0000000000..1c0ecb7679 --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm @@ -0,0 +1,461 @@ +/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: + * 2 -*- */ +/* vim: set ts=2 sw=2 et tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsServiceManagerUtils.h" +#include "nsObjCExceptions.h" +#include "nsCocoaUtils.h" +#include "nsIThread.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/Preferences.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/Assertions.h" +#include "OSXSpeechSynthesizerService.h" + +#import <Cocoa/Cocoa.h> + +@class SpeechDelegate; + +// We can escape the default delimiters ("[[" and "]]") by temporarily +// changing the delimiters just before they appear, and changing them back +// just after. +#define DLIM_ESCAPE_START "[[dlim (( ))]]" +#define DLIM_ESCAPE_END "((dlim [[ ]]))" + +using namespace mozilla; + +class SpeechTaskCallback final : public nsISpeechTaskCallback { + public: + SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth, + const nsTArray<size_t>& aOffsets); + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, + nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength); + void OnError(uint32_t aIndex); + void OnDidFinishSpeaking(); + + private: + virtual ~SpeechTaskCallback(); + + float GetTimeDurationFromStart(); + + nsCOMPtr<nsISpeechTask> mTask; + NSSpeechSynthesizer* mSpeechSynthesizer; + SpeechDelegate* mDelegate; + TimeStamp mStartingTime; + uint32_t mCurrentIndex; + nsTArray<size_t> mOffsets; +}; + +@interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> { + @private + SpeechTaskCallback* mCallback; +} + +- (id)initWithCallback:(SpeechTaskCallback*)aCallback; +@end + +@implementation SpeechDelegate +- (id)initWithCallback:(SpeechTaskCallback*)aCallback { + [super init]; + mCallback = aCallback; + return self; +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender + willSpeakWord:(NSRange)aRange + ofString:(NSString*)aString { + mCallback->OnWillSpeakWord(aRange.location, aRange.length); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender + didFinishSpeaking:(BOOL)aFinishedSpeaking { + mCallback->OnDidFinishSpeaking(); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender + didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex + ofString:(NSString*)aString + message:(NSString*)aMessage { + mCallback->OnError(aCharacterIndex); +} +@end + +NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback) + +SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask, + NSSpeechSynthesizer* aSynth, + const nsTArray<size_t>& aOffsets) + : mTask(aTask), + mSpeechSynthesizer(aSynth), + mCurrentIndex(0), + mOffsets(aOffsets.Clone()) { + mDelegate = [[SpeechDelegate alloc] initWithCallback:this]; + [mSpeechSynthesizer setDelegate:mDelegate]; + mStartingTime = TimeStamp::Now(); +} + +SpeechTaskCallback::~SpeechTaskCallback() { + [mSpeechSynthesizer setDelegate:nil]; + [mDelegate release]; + [mSpeechSynthesizer release]; +} + +NS_IMETHODIMP +SpeechTaskCallback::OnCancel() { + NS_OBJC_BEGIN_TRY_BLOCK_RETURN; + + [mSpeechSynthesizer stopSpeaking]; + return NS_OK; + + NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); +} + +NS_IMETHODIMP +SpeechTaskCallback::OnPause() { + NS_OBJC_BEGIN_TRY_BLOCK_RETURN; + + [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; + if (!mTask) { + // When calling pause() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; + + NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); +} + +NS_IMETHODIMP +SpeechTaskCallback::OnResume() { + NS_OBJC_BEGIN_TRY_BLOCK_RETURN; + + [mSpeechSynthesizer continueSpeaking]; + if (!mTask) { + // When calling resume() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; + + NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); +} + +NS_IMETHODIMP +SpeechTaskCallback::OnVolumeChanged(float aVolume) { + NS_OBJC_BEGIN_TRY_BLOCK_RETURN; + + [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume] + forProperty:NSSpeechVolumeProperty + error:nil]; + return NS_OK; + + NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); +} + +float SpeechTaskCallback::GetTimeDurationFromStart() { + TimeDuration duration = TimeStamp::Now() - mStartingTime; + return duration.ToSeconds(); +} + +void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) { + mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex; + if (!mTask) { + return; + } + mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex, + aLength, 1); +} + +void SpeechTaskCallback::OnError(uint32_t aIndex) { + if (!mTask) { + return; + } + mTask->DispatchError(GetTimeDurationFromStart(), aIndex); +} + +void SpeechTaskCallback::OnDidFinishSpeaking() { + mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex); + // no longer needed + [mSpeechSynthesizer setDelegate:nil]; + mTask = nullptr; +} + +namespace mozilla { +namespace dom { + +struct OSXVoice { + OSXVoice() : mIsDefault(false) {} + + nsString mUri; + nsString mName; + nsString mLocale; + bool mIsDefault; +}; + +class RegisterVoicesRunnable final : public Runnable { + public: + RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, + nsTArray<OSXVoice>& aList) + : Runnable("RegisterVoicesRunnable"), + mSpeechService(aSpeechService), + mVoices(aList) {} + + NS_IMETHOD Run() override; + + private: + ~RegisterVoicesRunnable() override = default; + + // This runnable always use sync mode. It is unnecesarry to reference object + OSXSpeechSynthesizerService* mSpeechService; + nsTArray<OSXVoice>& mVoices; +}; + +NS_IMETHODIMP +RegisterVoicesRunnable::Run() { + nsresult rv; + nsCOMPtr<nsISynthVoiceRegistry> registry = + do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv); + if (!registry) { + return rv; + } + + for (OSXVoice voice : mVoices) { + rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, + voice.mLocale, true, false); + if (NS_WARN_IF(NS_FAILED(rv))) { + continue; + } + + if (voice.mIsDefault) { + registry->SetDefaultVoice(voice.mUri, true); + } + } + + registry->NotifyVoicesChanged(); + + return NS_OK; +} + +class EnumVoicesRunnable final : public Runnable { + public: + explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService) + : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {} + + NS_IMETHOD Run() override; + + private: + ~EnumVoicesRunnable() override = default; + + RefPtr<OSXSpeechSynthesizerService> mSpeechService; +}; + +NS_IMETHODIMP +EnumVoicesRunnable::Run() { + NS_OBJC_BEGIN_TRY_BLOCK_RETURN; + + AutoTArray<OSXVoice, 64> list; + + NSArray* voices = [NSSpeechSynthesizer availableVoices]; + NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; + + for (NSString* voice in voices) { + OSXVoice item; + + NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice]; + + nsAutoString identifier; + nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], + identifier); + + nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], + item.mName); + + nsCocoaUtils::GetStringForNSString( + [attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale); + item.mLocale.ReplaceChar('_', '-'); + + item.mUri.AssignLiteral("urn:moz-tts:osx:"); + item.mUri.Append(identifier); + + if ([voice isEqualToString:defaultVoice]) { + item.mIsDefault = true; + } + + list.AppendElement(item); + } + + RefPtr<RegisterVoicesRunnable> runnable = + new RegisterVoicesRunnable(mSpeechService, list); + NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns, + GetMainThreadSerialEventTarget(), + runnable.forget()); + + return NS_OK; + + NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); +} + +StaticRefPtr<OSXSpeechSynthesizerService> + OSXSpeechSynthesizerService::sSingleton; + +NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(OSXSpeechSynthesizerService) +NS_IMPL_RELEASE(OSXSpeechSynthesizerService) + +OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() + : mInitialized(false) {} + +bool OSXSpeechSynthesizerService::Init() { + if (Preferences::GetBool("media.webspeech.synth.test") || + !StaticPrefs::media_webspeech_synth_enabled()) { + // When test is enabled, we shouldn't add OS backend (Bug 1160844) + return false; + } + + nsCOMPtr<nsIThread> thread; + if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) { + return false; + } + + // Get all the voices and register in the SynthVoiceRegistry + nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this); + thread->Dispatch(runnable, NS_DISPATCH_NORMAL); + + mInitialized = true; + return true; +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::Speak(const nsAString& aText, + const nsAString& aUri, float aVolume, + float aRate, float aPitch, + nsISpeechTask* aTask) { + NS_OBJC_BEGIN_TRY_BLOCK_RETURN; + + MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns), + "OSXSpeechSynthesizerService doesn't allow this voice URI"); + + NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init]; + // strlen("urn:moz-tts:osx:") == 16 + NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16)); + [synth setVoice:identifier]; + + // default rate is 180-220 + [synth setObject:[NSNumber numberWithInt:aRate * 200] + forProperty:NSSpeechRateProperty + error:nil]; + // volume allows 0.0-1.0 + [synth setObject:[NSNumber numberWithFloat:aVolume] + forProperty:NSSpeechVolumeProperty + error:nil]; + // Use default pitch value to calculate this + NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty + error:nil]; + if (defaultPitch) { + int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5); + [synth setObject:[NSNumber numberWithInt:newPitch] + forProperty:NSSpeechPitchBaseProperty + error:nil]; + } + + nsAutoString escapedText; + // We need to map the the offsets from the given text to the escaped text. + // The index of the offsets array is the position in the escaped text, + // the element value is the position in the user-supplied text. + nsTArray<size_t> offsets; + offsets.SetCapacity(aText.Length()); + + // This loop looks for occurances of "[[" or "]]", escapes them, and + // populates the offsets array to supply a map to the original offsets. + for (size_t i = 0; i < aText.Length(); i++) { + if (aText.Length() > i + 1 && ((aText[i] == ']' && aText[i + 1] == ']') || + (aText[i] == '[' && aText[i + 1] == '['))) { + escapedText.AppendLiteral(DLIM_ESCAPE_START); + offsets.AppendElements(strlen(DLIM_ESCAPE_START)); + escapedText.Append(aText[i]); + offsets.AppendElement(i); + escapedText.Append(aText[++i]); + offsets.AppendElement(i); + escapedText.AppendLiteral(DLIM_ESCAPE_END); + offsets.AppendElements(strlen(DLIM_ESCAPE_END)); + } else { + escapedText.Append(aText[i]); + offsets.AppendElement(i); + } + } + + RefPtr<SpeechTaskCallback> callback = + new SpeechTaskCallback(aTask, synth, offsets); + nsresult rv = aTask->Setup(callback); + NS_ENSURE_SUCCESS(rv, rv); + + NSString* text = nsCocoaUtils::ToNSString(escapedText); + BOOL success = [synth startSpeakingString:text]; + NS_ENSURE_TRUE(success, NS_ERROR_FAILURE); + + aTask->DispatchStart(); + return NS_OK; + + NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); +} + +NS_IMETHODIMP +OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + return NS_OK; +} + +OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() { + MOZ_ASSERT(NS_IsMainThread()); + if (XRE_GetProcessType() != GeckoProcessType_Default) { + return nullptr; + } + + if (!sSingleton) { + RefPtr<OSXSpeechSynthesizerService> speechService = + new OSXSpeechSynthesizerService(); + if (speechService->Init()) { + sSingleton = speechService; + ClearOnShutdown(&sSingleton); + } + } + return sSingleton; +} + +already_AddRefed<OSXSpeechSynthesizerService> +OSXSpeechSynthesizerService::GetInstanceForService() { + RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance(); + return speechService.forget(); +} + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/cocoa/components.conf b/dom/media/webspeech/synth/cocoa/components.conf new file mode 100644 index 0000000000..c9b0fa5ef0 --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/components.conf @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{914e73b4-6337-4bef-97f3-4d069e053a12}', + 'contract_ids': ['@mozilla.org/synthsystem;1'], + 'singleton': True, + 'type': 'mozilla::dom::OSXSpeechSynthesizerService', + 'headers': ['/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h'], + 'constructor': 'mozilla::dom::OSXSpeechSynthesizerService::GetInstanceForService', + 'categories': {"speech-synth-started": 'OSX Speech Synth'}, + }, +] diff --git a/dom/media/webspeech/synth/cocoa/moz.build b/dom/media/webspeech/synth/cocoa/moz.build new file mode 100644 index 0000000000..4d59f7a389 --- /dev/null +++ b/dom/media/webspeech/synth/cocoa/moz.build @@ -0,0 +1,15 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += [ + "OSXSpeechSynthesizerService.mm", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +FINAL_LIBRARY = "xul" diff --git a/dom/media/webspeech/synth/crashtests/1230428.html b/dom/media/webspeech/synth/crashtests/1230428.html new file mode 100644 index 0000000000..40fa000710 --- /dev/null +++ b/dom/media/webspeech/synth/crashtests/1230428.html @@ -0,0 +1,32 @@ +<!DOCTYPE html> +<html class="reftest-wait"> +<head> +<meta charset="utf-8"> +<script type="application/javascript"> +function f() +{ + if (speechSynthesis.getVoices().length == 0) { + // No synthesis backend to test this + document.documentElement.removeAttribute('class'); + return; + } + + var s = new SpeechSynthesisUtterance("hello world"); + s.onerror = () => { + // No synthesis backend to test this + document.documentElement.removeAttribute('class'); + return; + } + s.onend = () => { + document.documentElement.removeAttribute('class'); + }; + speechSynthesis.speak(s); + speechSynthesis.cancel(); + speechSynthesis.pause(); + speechSynthesis.resume(); +} + </script> +</head> +<body onload="f();"> +</body> +</html> diff --git a/dom/media/webspeech/synth/crashtests/crashtests.list b/dom/media/webspeech/synth/crashtests/crashtests.list new file mode 100644 index 0000000000..07e931c929 --- /dev/null +++ b/dom/media/webspeech/synth/crashtests/crashtests.list @@ -0,0 +1 @@ +skip-if(!cocoaWidget) pref(media.webspeech.synth.enabled,true) load 1230428.html # bug 1230428 diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl new file mode 100644 index 0000000000..716b5f9af2 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl @@ -0,0 +1,51 @@ +/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 40 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +include protocol PContent; +include protocol PSpeechSynthesisRequest; + +namespace mozilla { +namespace dom { + +struct RemoteVoice { + nsString voiceURI; + nsString name; + nsString lang; + bool localService; + bool queued; +}; + +sync protocol PSpeechSynthesis +{ + manager PContent; + manages PSpeechSynthesisRequest; + +child: + + async VoiceAdded(RemoteVoice aVoice); + + async VoiceRemoved(nsString aUri); + + async SetDefaultVoice(nsString aUri, bool aIsDefault); + + async IsSpeakingChanged(bool aIsSpeaking); + + async NotifyVoicesChanged(); + + async NotifyVoicesError(nsString aError); + + async InitialVoicesAndState(RemoteVoice[] aVoices, nsString[] aDefaults, + bool aIsSpeaking); + +parent: + async __delete__(); + + async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang, + float aVolume, float aRate, float aPitch, bool aShouldResistFingerprinting); +}; + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl new file mode 100644 index 0000000000..8543eebc5b --- /dev/null +++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl @@ -0,0 +1,48 @@ +/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 40 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +include protocol PSpeechSynthesis; + +namespace mozilla { +namespace dom { + +[ManualDealloc, ChildImpl=virtual, ParentImpl=virtual] +async protocol PSpeechSynthesisRequest +{ + manager PSpeechSynthesis; + + parent: + + async __delete__(); + + async Pause(); + + async Resume(); + + async Cancel(); + + async ForceEnd(); + + async SetAudioOutputVolume(float aVolume); + + child: + + async OnEnd(bool aIsError, float aElapsedTime, uint32_t aCharIndex); + + async OnStart(nsString aUri); + + async OnPause(float aElapsedTime, uint32_t aCharIndex); + + async OnResume(float aElapsedTime, uint32_t aCharIndex); + + async OnBoundary(nsString aName, float aElapsedTime, uint32_t aCharIndex, + uint32_t aCharLength, uint8_t argc); + + async OnMark(nsString aName, float aElapsedTime, uint32_t aCharIndex); +}; + +} // namespace dom +} // namespace mozilla diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp new file mode 100644 index 0000000000..ff28d0c418 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp @@ -0,0 +1,175 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesisChild.h" +#include "nsSynthVoiceRegistry.h" + +namespace mozilla::dom { + +SpeechSynthesisChild::SpeechSynthesisChild() { + MOZ_COUNT_CTOR(SpeechSynthesisChild); +} + +SpeechSynthesisChild::~SpeechSynthesisChild() { + MOZ_COUNT_DTOR(SpeechSynthesisChild); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvInitialVoicesAndState( + nsTArray<RemoteVoice>&& aVoices, nsTArray<nsString>&& aDefaults, + const bool& aIsSpeaking) { + nsSynthVoiceRegistry::RecvInitialVoicesAndState(aVoices, aDefaults, + aIsSpeaking); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvVoiceAdded( + const RemoteVoice& aVoice) { + nsSynthVoiceRegistry::RecvAddVoice(aVoice); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvVoiceRemoved( + const nsAString& aUri) { + nsSynthVoiceRegistry::RecvRemoveVoice(aUri); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvSetDefaultVoice( + const nsAString& aUri, const bool& aIsDefault) { + nsSynthVoiceRegistry::RecvSetDefaultVoice(aUri, aIsDefault); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvIsSpeakingChanged( + const bool& aIsSpeaking) { + nsSynthVoiceRegistry::RecvIsSpeakingChanged(aIsSpeaking); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvNotifyVoicesChanged() { + nsSynthVoiceRegistry::RecvNotifyVoicesChanged(); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisChild::RecvNotifyVoicesError( + const nsAString& aError) { + nsSynthVoiceRegistry::RecvNotifyVoicesError(aError); + return IPC_OK(); +} + +PSpeechSynthesisRequestChild* +SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild( + const nsAString& aText, const nsAString& aLang, const nsAString& aUri, + const float& aVolume, const float& aRate, const float& aPitch, + const bool& aShouldResistFingerprinting) { + MOZ_CRASH("Caller is supposed to manually construct a request!"); +} + +bool SpeechSynthesisChild::DeallocPSpeechSynthesisRequestChild( + PSpeechSynthesisRequestChild* aActor) { + delete aActor; + return true; +} + +// SpeechSynthesisRequestChild + +SpeechSynthesisRequestChild::SpeechSynthesisRequestChild(SpeechTaskChild* aTask) + : mTask(aTask) { + mTask->mActor = this; + MOZ_COUNT_CTOR(SpeechSynthesisRequestChild); +} + +SpeechSynthesisRequestChild::~SpeechSynthesisRequestChild() { + MOZ_COUNT_DTOR(SpeechSynthesisRequestChild); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnStart( + const nsAString& aUri) { + mTask->DispatchStartImpl(aUri); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnEnd( + const bool& aIsError, const float& aElapsedTime, + const uint32_t& aCharIndex) { + SpeechSynthesisRequestChild* actor = mTask->mActor; + mTask->mActor = nullptr; + + if (aIsError) { + mTask->DispatchErrorImpl(aElapsedTime, aCharIndex); + } else { + mTask->DispatchEndImpl(aElapsedTime, aCharIndex); + } + + SpeechSynthesisRequestChild::Send__delete__(actor); + + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnPause( + const float& aElapsedTime, const uint32_t& aCharIndex) { + mTask->DispatchPauseImpl(aElapsedTime, aCharIndex); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnResume( + const float& aElapsedTime, const uint32_t& aCharIndex) { + mTask->DispatchResumeImpl(aElapsedTime, aCharIndex); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnBoundary( + const nsAString& aName, const float& aElapsedTime, + const uint32_t& aCharIndex, const uint32_t& aCharLength, + const uint8_t& argc) { + mTask->DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, + argc); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnMark( + const nsAString& aName, const float& aElapsedTime, + const uint32_t& aCharIndex) { + mTask->DispatchMarkImpl(aName, aElapsedTime, aCharIndex); + return IPC_OK(); +} + +// SpeechTaskChild + +SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, + bool aShouldResistFingerprinting) + : nsSpeechTask(aUtterance, aShouldResistFingerprinting), mActor(nullptr) {} + +NS_IMETHODIMP +SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback) { + MOZ_CRASH("Should never be called from child"); +} + +void SpeechTaskChild::Pause() { + MOZ_ASSERT(mActor); + mActor->SendPause(); +} + +void SpeechTaskChild::Resume() { + MOZ_ASSERT(mActor); + mActor->SendResume(); +} + +void SpeechTaskChild::Cancel() { + MOZ_ASSERT(mActor); + mActor->SendCancel(); +} + +void SpeechTaskChild::ForceEnd() { + MOZ_ASSERT(mActor); + mActor->SendForceEnd(); +} + +void SpeechTaskChild::SetAudioOutputVolume(float aVolume) { + if (mActor) { + mActor->SendSetAudioOutputVolume(aVolume); + } +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h new file mode 100644 index 0000000000..da2887f2da --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h @@ -0,0 +1,111 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisChild_h +#define mozilla_dom_SpeechSynthesisChild_h + +#include "mozilla/Attributes.h" +#include "mozilla/dom/PSpeechSynthesisChild.h" +#include "mozilla/dom/PSpeechSynthesisRequestChild.h" +#include "nsSpeechTask.h" + +namespace mozilla::dom { + +class nsSynthVoiceRegistry; +class SpeechSynthesisRequestChild; +class SpeechTaskChild; + +class SpeechSynthesisChild : public PSpeechSynthesisChild { + friend class nsSynthVoiceRegistry; + friend class PSpeechSynthesisChild; + + public: + NS_INLINE_DECL_REFCOUNTING(SpeechSynthesisChild, override) + + mozilla::ipc::IPCResult RecvInitialVoicesAndState( + nsTArray<RemoteVoice>&& aVoices, nsTArray<nsString>&& aDefaults, + const bool& aIsSpeaking); + + mozilla::ipc::IPCResult RecvVoiceAdded(const RemoteVoice& aVoice); + + mozilla::ipc::IPCResult RecvVoiceRemoved(const nsAString& aUri); + + mozilla::ipc::IPCResult RecvSetDefaultVoice(const nsAString& aUri, + const bool& aIsDefault); + + mozilla::ipc::IPCResult RecvIsSpeakingChanged(const bool& aIsSpeaking); + + mozilla::ipc::IPCResult RecvNotifyVoicesChanged(); + + mozilla::ipc::IPCResult RecvNotifyVoicesError(const nsAString& aError); + + protected: + SpeechSynthesisChild(); + virtual ~SpeechSynthesisChild(); + + PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild( + const nsAString& aLang, const nsAString& aUri, const nsAString& aText, + const float& aVolume, const float& aPitch, const float& aRate, + const bool& aShouldResistFingerprinting); + bool DeallocPSpeechSynthesisRequestChild( + PSpeechSynthesisRequestChild* aActor); +}; + +class SpeechSynthesisRequestChild : public PSpeechSynthesisRequestChild { + public: + explicit SpeechSynthesisRequestChild(SpeechTaskChild* aTask); + virtual ~SpeechSynthesisRequestChild(); + + protected: + mozilla::ipc::IPCResult RecvOnStart(const nsAString& aUri) override; + + mozilla::ipc::IPCResult RecvOnEnd(const bool& aIsError, + const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + mozilla::ipc::IPCResult RecvOnPause(const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + mozilla::ipc::IPCResult RecvOnResume(const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + mozilla::ipc::IPCResult RecvOnBoundary(const nsAString& aName, + const float& aElapsedTime, + const uint32_t& aCharIndex, + const uint32_t& aCharLength, + const uint8_t& argc) override; + + mozilla::ipc::IPCResult RecvOnMark(const nsAString& aName, + const float& aElapsedTime, + const uint32_t& aCharIndex) override; + + RefPtr<SpeechTaskChild> mTask; +}; + +class SpeechTaskChild : public nsSpeechTask { + friend class SpeechSynthesisRequestChild; + + public: + explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, + bool aShouldResistFingerprinting); + + NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback) override; + + void Pause() override; + + void Resume() override; + + void Cancel() override; + + void ForceEnd() override; + + void SetAudioOutputVolume(float aVolume) override; + + private: + SpeechSynthesisRequestChild* mActor; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp new file mode 100644 index 0000000000..a9eb53c5b7 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp @@ -0,0 +1,221 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechSynthesisParent.h" +#include "nsSynthVoiceRegistry.h" + +namespace mozilla::dom { + +SpeechSynthesisParent::SpeechSynthesisParent() { + MOZ_COUNT_CTOR(SpeechSynthesisParent); +} + +SpeechSynthesisParent::~SpeechSynthesisParent() { + MOZ_COUNT_DTOR(SpeechSynthesisParent); +} + +void SpeechSynthesisParent::ActorDestroy(ActorDestroyReason aWhy) { + // Implement me! Bug 1005141 +} + +bool SpeechSynthesisParent::SendInit() { + return nsSynthVoiceRegistry::GetInstance()->SendInitialVoicesAndState(this); +} + +PSpeechSynthesisRequestParent* +SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent( + const nsAString& aText, const nsAString& aLang, const nsAString& aUri, + const float& aVolume, const float& aRate, const float& aPitch, + const bool& aShouldResistFingerprinting) { + RefPtr<SpeechTaskParent> task = + new SpeechTaskParent(aVolume, aText, aShouldResistFingerprinting); + SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task); + return actor; +} + +bool SpeechSynthesisParent::DeallocPSpeechSynthesisRequestParent( + PSpeechSynthesisRequestParent* aActor) { + delete aActor; + return true; +} + +mozilla::ipc::IPCResult +SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor( + PSpeechSynthesisRequestParent* aActor, const nsAString& aText, + const nsAString& aLang, const nsAString& aUri, const float& aVolume, + const float& aRate, const float& aPitch, + const bool& aShouldResistFingerprinting) { + MOZ_ASSERT(aActor); + SpeechSynthesisRequestParent* actor = + static_cast<SpeechSynthesisRequestParent*>(aActor); + nsSynthVoiceRegistry::GetInstance()->Speak(aText, aLang, aUri, aVolume, aRate, + aPitch, actor->mTask); + return IPC_OK(); +} + +// SpeechSynthesisRequestParent + +SpeechSynthesisRequestParent::SpeechSynthesisRequestParent( + SpeechTaskParent* aTask) + : mTask(aTask) { + mTask->mActor = this; + MOZ_COUNT_CTOR(SpeechSynthesisRequestParent); +} + +SpeechSynthesisRequestParent::~SpeechSynthesisRequestParent() { + if (mTask) { + mTask->mActor = nullptr; + // If we still have a task, cancel it. + mTask->Cancel(); + } + MOZ_COUNT_DTOR(SpeechSynthesisRequestParent); +} + +void SpeechSynthesisRequestParent::ActorDestroy(ActorDestroyReason aWhy) { + // Implement me! Bug 1005141 +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvPause() { + MOZ_ASSERT(mTask); + mTask->Pause(); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestParent::Recv__delete__() { + MOZ_ASSERT(mTask); + mTask->mActor = nullptr; + mTask = nullptr; + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvResume() { + MOZ_ASSERT(mTask); + mTask->Resume(); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvCancel() { + MOZ_ASSERT(mTask); + mTask->Cancel(); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvForceEnd() { + MOZ_ASSERT(mTask); + mTask->ForceEnd(); + return IPC_OK(); +} + +mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvSetAudioOutputVolume( + const float& aVolume) { + MOZ_ASSERT(mTask); + mTask->SetAudioOutputVolume(aVolume); + return IPC_OK(); +} + +// SpeechTaskParent + +nsresult SpeechTaskParent::DispatchStartImpl(const nsAString& aUri) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnStart(aUri)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult SpeechTaskParent::DispatchEndImpl(float aElapsedTime, + uint32_t aCharIndex) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnEnd(false, aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult SpeechTaskParent::DispatchPauseImpl(float aElapsedTime, + uint32_t aCharIndex) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnPause(aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult SpeechTaskParent::DispatchResumeImpl(float aElapsedTime, + uint32_t aCharIndex) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnResume(aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult SpeechTaskParent::DispatchErrorImpl(float aElapsedTime, + uint32_t aCharIndex) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnEnd(true, aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult SpeechTaskParent::DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, + uint32_t aCharIndex, + uint32_t aCharLength, + uint8_t argc) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnBoundary(aName, aElapsedTime, aCharIndex, + aCharLength, argc)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult SpeechTaskParent::DispatchMarkImpl(const nsAString& aName, + float aElapsedTime, + uint32_t aCharIndex) { + if (!mActor) { + // Child is already gone. + return NS_OK; + } + + if (NS_WARN_IF(!(mActor->SendOnMark(aName, aElapsedTime, aCharIndex)))) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h new file mode 100644 index 0000000000..d2c033ea30 --- /dev/null +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h @@ -0,0 +1,104 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechSynthesisParent_h +#define mozilla_dom_SpeechSynthesisParent_h + +#include "mozilla/dom/PSpeechSynthesisParent.h" +#include "mozilla/dom/PSpeechSynthesisRequestParent.h" +#include "nsSpeechTask.h" + +namespace mozilla::dom { + +class ContentParent; +class SpeechTaskParent; +class SpeechSynthesisRequestParent; + +class SpeechSynthesisParent : public PSpeechSynthesisParent { + friend class ContentParent; + friend class SpeechSynthesisRequestParent; + friend class PSpeechSynthesisParent; + + public: + NS_INLINE_DECL_REFCOUNTING(SpeechSynthesisParent, override) + + void ActorDestroy(ActorDestroyReason aWhy) override; + + bool SendInit(); + + protected: + SpeechSynthesisParent(); + virtual ~SpeechSynthesisParent(); + PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent( + const nsAString& aText, const nsAString& aLang, const nsAString& aUri, + const float& aVolume, const float& aRate, const float& aPitch, + const bool& aShouldResistFingerprinting); + + bool DeallocPSpeechSynthesisRequestParent( + PSpeechSynthesisRequestParent* aActor); + + mozilla::ipc::IPCResult RecvPSpeechSynthesisRequestConstructor( + PSpeechSynthesisRequestParent* aActor, const nsAString& aText, + const nsAString& aLang, const nsAString& aUri, const float& aVolume, + const float& aRate, const float& aPitch, + const bool& aShouldResistFingerprinting) override; +}; + +class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent { + public: + explicit SpeechSynthesisRequestParent(SpeechTaskParent* aTask); + virtual ~SpeechSynthesisRequestParent(); + + RefPtr<SpeechTaskParent> mTask; + + protected: + void ActorDestroy(ActorDestroyReason aWhy) override; + + mozilla::ipc::IPCResult RecvPause() override; + + mozilla::ipc::IPCResult RecvResume() override; + + mozilla::ipc::IPCResult RecvCancel() override; + + mozilla::ipc::IPCResult RecvForceEnd() override; + + mozilla::ipc::IPCResult RecvSetAudioOutputVolume( + const float& aVolume) override; + + mozilla::ipc::IPCResult Recv__delete__() override; +}; + +class SpeechTaskParent : public nsSpeechTask { + friend class SpeechSynthesisRequestParent; + + public: + SpeechTaskParent(float aVolume, const nsAString& aUtterance, + bool aShouldResistFingerprinting) + : nsSpeechTask(aVolume, aUtterance, aShouldResistFingerprinting), + mActor(nullptr) {} + + nsresult DispatchStartImpl(const nsAString& aUri) override; + + nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) override; + + nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) override; + + nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) override; + + nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) override; + + nsresult DispatchBoundaryImpl(const nsAString& aName, float aElapsedTime, + uint32_t aCharIndex, uint32_t aCharLength, + uint8_t argc) override; + + nsresult DispatchMarkImpl(const nsAString& aName, float aElapsedTime, + uint32_t aCharIndex) override; + + private: + SpeechSynthesisRequestParent* mActor; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/moz.build b/dom/media/webspeech/synth/moz.build new file mode 100644 index 0000000000..2cf19982b2 --- /dev/null +++ b/dom/media/webspeech/synth/moz.build @@ -0,0 +1,65 @@ +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +if CONFIG["MOZ_WEBSPEECH"]: + MOCHITEST_MANIFESTS += [ + "test/mochitest.toml", + "test/startup/mochitest.toml", + ] + + XPIDL_MODULE = "dom_webspeechsynth" + + XPIDL_SOURCES += ["nsISpeechService.idl", "nsISynthVoiceRegistry.idl"] + + EXPORTS.mozilla.dom += [ + "ipc/SpeechSynthesisChild.h", + "ipc/SpeechSynthesisParent.h", + "nsSpeechTask.h", + "nsSynthVoiceRegistry.h", + "SpeechSynthesis.h", + "SpeechSynthesisUtterance.h", + "SpeechSynthesisVoice.h", + ] + + UNIFIED_SOURCES += [ + "ipc/SpeechSynthesisChild.cpp", + "ipc/SpeechSynthesisParent.cpp", + "nsSpeechTask.cpp", + "nsSynthVoiceRegistry.cpp", + "SpeechSynthesis.cpp", + "SpeechSynthesisUtterance.cpp", + "SpeechSynthesisVoice.cpp", + ] + + if CONFIG["MOZ_WEBSPEECH_TEST_BACKEND"]: + UNIFIED_SOURCES += ["test/nsFakeSynthServices.cpp"] + + XPCOM_MANIFESTS += [ + "test/components.conf", + ] + + if CONFIG["MOZ_WIDGET_TOOLKIT"] == "windows": + DIRS += ["windows"] + + if CONFIG["MOZ_WIDGET_TOOLKIT"] == "cocoa": + DIRS += ["cocoa"] + + if CONFIG["MOZ_WIDGET_TOOLKIT"] == "android": + DIRS += ["android"] + + if CONFIG["MOZ_SYNTH_SPEECHD"]: + DIRS += ["speechd"] + + IPDL_SOURCES += [ + "ipc/PSpeechSynthesis.ipdl", + "ipc/PSpeechSynthesisRequest.ipdl", + ] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul" +LOCAL_INCLUDES += [ + "ipc", +] diff --git a/dom/media/webspeech/synth/nsISpeechService.idl b/dom/media/webspeech/synth/nsISpeechService.idl new file mode 100644 index 0000000000..b69973b6d2 --- /dev/null +++ b/dom/media/webspeech/synth/nsISpeechService.idl @@ -0,0 +1,143 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * A callback is implemented by the service. + */ +[scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)] +interface nsISpeechTaskCallback : nsISupports +{ + /** + * The user or application has paused the speech. + */ + void onPause(); + + /** + * The user or application has resumed the speech. + */ + void onResume(); + + /** + * The user or application has canceled the speech. + */ + void onCancel(); + + /** + * The user or application has changed the volume of this speech. + */ + void onVolumeChanged(in float aVolume); +}; + + +/** + * A task is associated with a single utterance. It is provided by the browser + * to the service in the speak() method. + */ +[scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)] +interface nsISpeechTask : nsISupports +{ + /** + * Prepare browser for speech. + * + * @param aCallback callback object for mid-speech operations. + */ + void setup(in nsISpeechTaskCallback aCallback); + + /** + * Dispatch start event. + */ + void dispatchStart(); + + /** + * Dispatch end event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchEnd(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch pause event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchPause(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch resume event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchResume(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch error event. + * + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchError(in float aElapsedTime, in unsigned long aCharIndex); + + /** + * Dispatch boundary event. + * + * @param aName name of boundary, 'word' or 'sentence' + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + * @param aCharLength length of text in boundary event to be spoken. + */ + [optional_argc] void dispatchBoundary(in AString aName, in float aElapsedTime, + in unsigned long aCharIndex, + [optional] in unsigned long aCharLength); + + /** + * Dispatch mark event. + * + * @param aName mark identifier. + * @param aElapsedTime time in seconds since speech has started. + * @param aCharIndex offset of spoken characters. + */ + void dispatchMark(in AString aName, in float aElapsedTime, in unsigned long aCharIndex); +}; + +/** + * The main interface of a speech synthesis service. + * + * A service is responsible for outputting audio. + * The service dispatches events, starting with dispatchStart() and ending with + * dispatchEnd or dispatchError(). + * A service must also respond with the currect actions and events in response + * to implemented callback methods. + */ +[scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)] +interface nsISpeechService : nsISupports +{ + /** + * Speak the given text using the voice identified byu the given uri. See + * W3C Speech API spec for information about pitch and rate. + * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes + * + * @param aText text to utter. + * @param aUri unique voice identifier. + * @param aVolume volume to speak voice in. Only relevant for indirect audio. + * @param aRate rate to speak voice in. + * @param aPitch pitch to speak voice in. + * @param aTask task instance for utterance, used for sending events or audio + * data back to browser. + */ + void speak(in AString aText, in AString aUri, + in float aVolume, in float aRate, in float aPitch, + in nsISpeechTask aTask); +}; + +%{C++ +// This is the service category speech services could use to start up as +// a component. +#define NS_SPEECH_SYNTH_STARTED "speech-synth-started" +%} diff --git a/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl b/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl new file mode 100644 index 0000000000..1898bf68c1 --- /dev/null +++ b/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl @@ -0,0 +1,82 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsISpeechService; + +[scriptable, builtinclass, uuid(5d7a0b38-77e5-4ee5-897c-ce5db9b85d44)] +interface nsISynthVoiceRegistry : nsISupports +{ + /** + * Register a speech synthesis voice. + * + * @param aService the service that provides this voice. + * @param aUri a unique identifier for this voice. + * @param aName human-readable name for this voice. + * @param aLang a BCP 47 language tag. + * @param aLocalService true if service does not require network. + * @param aQueuesUtterances true if voice only speaks one utterance at a time + */ + void addVoice(in nsISpeechService aService, in AString aUri, + in AString aName, in AString aLang, + in boolean aLocalService, in boolean aQueuesUtterances); + + /** + * Remove a speech synthesis voice. + * + * @param aService the service that was used to add the voice. + * @param aUri a unique identifier of an existing voice. + */ + void removeVoice(in nsISpeechService aService, in AString aUri); + + /** + * Notify content of voice availability changes. This allows content + * to be notified of voice catalog changes in real time. + */ + void notifyVoicesChanged(); + + /** + * Notify chrome code of an error when starting speech synthesis service + */ + void notifyVoicesError(in AString aError); + + /** + * Set a voice as default. + * + * @param aUri a unique identifier of an existing voice. + * @param aIsDefault true if this voice should be toggled as default. + */ + void setDefaultVoice(in AString aUri, in boolean aIsDefault); + + readonly attribute uint32_t voiceCount; + + AString getVoice(in uint32_t aIndex); + + bool isDefaultVoice(in AString aUri); + + bool isLocalVoice(in AString aUri); + + AString getVoiceLang(in AString aUri); + + AString getVoiceName(in AString aUri); +}; + +%{C++ +#define NS_SYNTHVOICEREGISTRY_CID \ + { /* {7090524d-5574-4492-a77f-d8d558ced59d} */ \ + 0x7090524d, \ + 0x5574, \ + 0x4492, \ + { 0xa7, 0x7f, 0xd8, 0xd5, 0x58, 0xce, 0xd5, 0x9d } \ + } + +#define NS_SYNTHVOICEREGISTRY_CONTRACTID \ + "@mozilla.org/synth-voice-registry;1" + +#define NS_SYNTHVOICEREGISTRY_CLASSNAME \ + "Speech Synthesis Voice Registry" + +%} diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp new file mode 100644 index 0000000000..b102172466 --- /dev/null +++ b/dom/media/webspeech/synth/nsSpeechTask.cpp @@ -0,0 +1,389 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioChannelService.h" +#include "AudioSegment.h" +#include "nsSpeechTask.h" +#include "nsSynthVoiceRegistry.h" +#include "nsXULAppAPI.h" +#include "SharedBuffer.h" +#include "SpeechSynthesis.h" + +#undef LOG +extern mozilla::LogModule* GetSpeechSynthLog(); +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +#define AUDIO_TRACK 1 + +namespace mozilla::dom { + +// nsSpeechTask + +NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance, + mCallback) + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask) + NS_INTERFACE_MAP_ENTRY(nsISpeechTask) + NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) + +nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, + bool aShouldResistFingerprinting) + : mUtterance(aUtterance), + mInited(false), + mPrePaused(false), + mPreCanceled(false), + mCallback(nullptr), + mShouldResistFingerprinting(aShouldResistFingerprinting), + mState(STATE_PENDING) { + mText = aUtterance->mText; + mVolume = aUtterance->Volume(); +} + +nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, + bool aShouldResistFingerprinting) + : mUtterance(nullptr), + mVolume(aVolume), + mText(aText), + mInited(false), + mPrePaused(false), + mPreCanceled(false), + mCallback(nullptr), + mShouldResistFingerprinting(aShouldResistFingerprinting), + mState(STATE_PENDING) {} + +nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); } + +void nsSpeechTask::Init() { mInited = true; } + +void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) { + mChosenVoiceURI = aUri; +} + +NS_IMETHODIMP +nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) { + MOZ_ASSERT(XRE_IsParentProcess()); + + LOG(LogLevel::Debug, ("nsSpeechTask::Setup")); + + mCallback = aCallback; + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchStart() { + nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true); + return DispatchStartImpl(); +} + +nsresult nsSpeechTask::DispatchStartImpl() { + return DispatchStartImpl(mChosenVoiceURI); +} + +nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) { + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl")); + + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(mState != STATE_PENDING)) { + return NS_ERROR_NOT_AVAILABLE; + } + + CreateAudioChannelAgent(); + + mState = STATE_SPEAKING; + mUtterance->mChosenVoiceURI = aUri; + mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns); + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) { + // After we end, no callback functions should go through. + mCallback = nullptr; + + if (!mPreCanceled) { + nsSynthVoiceRegistry::GetInstance()->SpeakNext(); + } + + return DispatchEndImpl(aElapsedTime, aCharIndex); +} + +nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime, + uint32_t aCharIndex) { + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl")); + + DestroyAudioChannelAgent(); + + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(mState == STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr<SpeechSynthesisUtterance> utterance = mUtterance; + + if (mSpeechSynthesis) { + mSpeechSynthesis->OnEnd(this); + } + + mState = STATE_ENDED; + utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr, + aElapsedTime, u""_ns); + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) { + return DispatchPauseImpl(aElapsedTime, aCharIndex); +} + +nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime, + uint32_t aCharIndex) { + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl")); + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(mUtterance->mPaused)) { + return NS_ERROR_NOT_AVAILABLE; + } + if (NS_WARN_IF(mState == STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + mUtterance->mPaused = true; + if (mState == STATE_SPEAKING) { + mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr, + aElapsedTime, u""_ns); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) { + return DispatchResumeImpl(aElapsedTime, aCharIndex); +} + +nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime, + uint32_t aCharIndex) { + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl")); + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(!(mUtterance->mPaused))) { + return NS_ERROR_NOT_AVAILABLE; + } + if (NS_WARN_IF(mState == STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + mUtterance->mPaused = false; + if (mState == STATE_SPEAKING) { + mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr, + aElapsedTime, u""_ns); + } + + return NS_OK; +} + +void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) { + DispatchError(aElapsedTime, aCharIndex); +} + +NS_IMETHODIMP +nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) { + if (!mPreCanceled) { + nsSynthVoiceRegistry::GetInstance()->SpeakNext(); + } + + return DispatchErrorImpl(aElapsedTime, aCharIndex); +} + +nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime, + uint32_t aCharIndex) { + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl")); + + DestroyAudioChannelAgent(); + + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(mState == STATE_ENDED)) { + return NS_ERROR_NOT_AVAILABLE; + } + + if (mSpeechSynthesis) { + mSpeechSynthesis->OnEnd(this); + } + + mState = STATE_ENDED; + mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr, + aElapsedTime, u""_ns); + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime, + uint32_t aCharIndex, uint32_t aCharLength, + uint8_t argc) { + return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, + argc); +} + +nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, + uint32_t aCharIndex, + uint32_t aCharLength, + uint8_t argc) { + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(mState != STATE_SPEAKING)) { + return NS_ERROR_NOT_AVAILABLE; + } + mUtterance->DispatchSpeechSynthesisEvent( + u"boundary"_ns, aCharIndex, + argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr, + aElapsedTime, aName); + + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime, + uint32_t aCharIndex) { + return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); +} + +nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName, + float aElapsedTime, + uint32_t aCharIndex) { + MOZ_ASSERT(mUtterance); + if (NS_WARN_IF(mState != STATE_SPEAKING)) { + return NS_ERROR_NOT_AVAILABLE; + } + mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr, + aElapsedTime, aName); + return NS_OK; +} + +void nsSpeechTask::Pause() { + MOZ_ASSERT(XRE_IsParentProcess()); + + if (mCallback) { + DebugOnly<nsresult> rv = mCallback->OnPause(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); + } + + if (!mInited) { + mPrePaused = true; + } +} + +void nsSpeechTask::Resume() { + MOZ_ASSERT(XRE_IsParentProcess()); + + if (mCallback) { + DebugOnly<nsresult> rv = mCallback->OnResume(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Unable to call onResume() callback"); + } + + if (mPrePaused) { + mPrePaused = false; + nsSynthVoiceRegistry::GetInstance()->ResumeQueue(); + } +} + +void nsSpeechTask::Cancel() { + MOZ_ASSERT(XRE_IsParentProcess()); + + LOG(LogLevel::Debug, ("nsSpeechTask::Cancel")); + + if (mCallback) { + DebugOnly<nsresult> rv = mCallback->OnCancel(); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), + "Unable to call onCancel() callback"); + } + + if (!mInited) { + mPreCanceled = true; + } +} + +void nsSpeechTask::ForceEnd() { + if (!mInited) { + mPreCanceled = true; + } + + DispatchEnd(0, 0); +} + +void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) { + mSpeechSynthesis = aSpeechSynthesis; +} + +void nsSpeechTask::CreateAudioChannelAgent() { + if (!mUtterance) { + return; + } + + if (mAudioChannelAgent) { + mAudioChannelAgent->NotifyStoppedPlaying(); + } + + mAudioChannelAgent = new AudioChannelAgent(); + mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwner(), this); + + nsresult rv = mAudioChannelAgent->NotifyStartedPlaying( + AudioChannelService::AudibleState::eAudible); + if (NS_WARN_IF(NS_FAILED(rv))) { + return; + } + + mAudioChannelAgent->PullInitialUpdate(); +} + +void nsSpeechTask::DestroyAudioChannelAgent() { + if (mAudioChannelAgent) { + mAudioChannelAgent->NotifyStoppedPlaying(); + mAudioChannelAgent = nullptr; + } +} + +NS_IMETHODIMP +nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) { + SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume); + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) { + if (!mUtterance) { + return NS_OK; + } + + if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) { + Resume(); + } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED && + !mUtterance->mPaused) { + Pause(); + } + return NS_OK; +} + +NS_IMETHODIMP +nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) { + // This is not supported yet. + return NS_OK; +} + +void nsSpeechTask::SetAudioOutputVolume(float aVolume) { + if (mCallback) { + mCallback->OnVolumeChanged(aVolume); + } +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h new file mode 100644 index 0000000000..fc121cf8f1 --- /dev/null +++ b/dom/media/webspeech/synth/nsSpeechTask.h @@ -0,0 +1,128 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_nsSpeechTask_h +#define mozilla_dom_nsSpeechTask_h + +#include "SpeechSynthesisUtterance.h" +#include "AudioChannelAgent.h" +#include "nsISpeechService.h" +#include "nsWeakReference.h" + +namespace mozilla { + +class SharedBuffer; + +namespace dom { + +class SpeechSynthesisUtterance; +class SpeechSynthesis; + +class nsSpeechTask : public nsISpeechTask, + public nsIAudioChannelAgentCallback, + public nsSupportsWeakReference { + public: + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSpeechTask, nsISpeechTask) + + NS_DECL_NSISPEECHTASK + NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK + + explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance, + bool aShouldResistFingerprinting); + nsSpeechTask(float aVolume, const nsAString& aText, + bool aShouldResistFingerprinting); + + virtual void Pause(); + + virtual void Resume(); + + virtual void Cancel(); + + virtual void ForceEnd(); + + void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis); + + void Init(); + + void SetChosenVoiceURI(const nsAString& aUri); + + virtual void SetAudioOutputVolume(float aVolume); + + void ForceError(float aElapsedTime, uint32_t aCharIndex); + + bool IsPreCanceled() { return mPreCanceled; }; + + bool IsPrePaused() { return mPrePaused; } + + bool ShouldResistFingerprinting() { return mShouldResistFingerprinting; } + + enum { STATE_PENDING, STATE_SPEAKING, STATE_ENDED }; + + uint32_t GetState() const { return mState; } + + bool IsSpeaking() const { return mState == STATE_SPEAKING; } + + bool IsPending() const { return mState == STATE_PENDING; } + + protected: + virtual ~nsSpeechTask(); + + nsresult DispatchStartImpl(); + + virtual nsresult DispatchStartImpl(const nsAString& aUri); + + virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex); + + virtual nsresult DispatchBoundaryImpl(const nsAString& aName, + float aElapsedTime, uint32_t aCharIndex, + uint32_t aCharLength, uint8_t argc); + + virtual nsresult DispatchMarkImpl(const nsAString& aName, float aElapsedTime, + uint32_t aCharIndex); + + RefPtr<SpeechSynthesisUtterance> mUtterance; + + float mVolume; + + nsString mText; + + bool mInited; + + bool mPrePaused; + + bool mPreCanceled; + + private: + void End(); + + void CreateAudioChannelAgent(); + + void DestroyAudioChannelAgent(); + + nsCOMPtr<nsISpeechTaskCallback> mCallback; + + RefPtr<mozilla::dom::AudioChannelAgent> mAudioChannelAgent; + + RefPtr<SpeechSynthesis> mSpeechSynthesis; + + nsString mChosenVoiceURI; + + bool mShouldResistFingerprinting; + + uint32_t mState; +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp new file mode 100644 index 0000000000..452c174436 --- /dev/null +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp @@ -0,0 +1,790 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISpeechService.h" +#include "nsServiceManagerUtils.h" +#include "nsCategoryManagerUtils.h" + +#include "SpeechSynthesisUtterance.h" +#include "SpeechSynthesisVoice.h" +#include "nsContentUtils.h" +#include "nsSynthVoiceRegistry.h" +#include "nsSpeechTask.h" +#include "AudioChannelService.h" + +#include "nsString.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/dom/ContentChild.h" +#include "mozilla/dom/ContentParent.h" +#include "mozilla/dom/Document.h" +#include "mozilla/intl/LocaleService.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/Unused.h" + +#include "SpeechSynthesisChild.h" +#include "SpeechSynthesisParent.h" + +using mozilla::intl::LocaleService; + +#undef LOG +extern mozilla::LogModule* GetSpeechSynthLog(); +#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) + +namespace { + +void GetAllSpeechSynthActors( + nsTArray<mozilla::dom::SpeechSynthesisParent*>& aActors) { + MOZ_ASSERT(NS_IsMainThread()); + MOZ_ASSERT(aActors.IsEmpty()); + + AutoTArray<mozilla::dom::ContentParent*, 20> contentActors; + mozilla::dom::ContentParent::GetAll(contentActors); + + for (uint32_t contentIndex = 0; contentIndex < contentActors.Length(); + ++contentIndex) { + MOZ_ASSERT(contentActors[contentIndex]); + + AutoTArray<mozilla::dom::PSpeechSynthesisParent*, 5> speechsynthActors; + contentActors[contentIndex]->ManagedPSpeechSynthesisParent( + speechsynthActors); + + for (uint32_t speechsynthIndex = 0; + speechsynthIndex < speechsynthActors.Length(); ++speechsynthIndex) { + MOZ_ASSERT(speechsynthActors[speechsynthIndex]); + + mozilla::dom::SpeechSynthesisParent* actor = + static_cast<mozilla::dom::SpeechSynthesisParent*>( + speechsynthActors[speechsynthIndex]); + aActors.AppendElement(actor); + } + } +} + +} // namespace + +namespace mozilla::dom { + +// VoiceData + +class VoiceData final { + private: + // Private destructor, to discourage deletion outside of Release(): + ~VoiceData() = default; + + public: + VoiceData(nsISpeechService* aService, const nsAString& aUri, + const nsAString& aName, const nsAString& aLang, bool aIsLocal, + bool aQueuesUtterances) + : mService(aService), + mUri(aUri), + mName(aName), + mLang(aLang), + mIsLocal(aIsLocal), + mIsQueued(aQueuesUtterances) {} + + NS_INLINE_DECL_REFCOUNTING(VoiceData) + + nsCOMPtr<nsISpeechService> mService; + + nsString mUri; + + nsString mName; + + nsString mLang; + + bool mIsLocal; + + bool mIsQueued; +}; + +// GlobalQueueItem + +class GlobalQueueItem final { + private: + // Private destructor, to discourage deletion outside of Release(): + ~GlobalQueueItem() = default; + + public: + GlobalQueueItem(VoiceData* aVoice, nsSpeechTask* aTask, + const nsAString& aText, const float& aVolume, + const float& aRate, const float& aPitch) + : mVoice(aVoice), + mTask(aTask), + mText(aText), + mVolume(aVolume), + mRate(aRate), + mPitch(aPitch), + mIsLocal(false) {} + + NS_INLINE_DECL_REFCOUNTING(GlobalQueueItem) + + RefPtr<VoiceData> mVoice; + + RefPtr<nsSpeechTask> mTask; + + nsString mText; + + float mVolume; + + float mRate; + + float mPitch; + + bool mIsLocal; +}; + +// nsSynthVoiceRegistry + +static StaticRefPtr<nsSynthVoiceRegistry> gSynthVoiceRegistry; + +NS_IMPL_ISUPPORTS(nsSynthVoiceRegistry, nsISynthVoiceRegistry) + +nsSynthVoiceRegistry::nsSynthVoiceRegistry() + : mSpeechSynthChild(nullptr), mUseGlobalQueue(false), mIsSpeaking(false) { + if (XRE_IsContentProcess()) { + RefPtr<SpeechSynthesisChild> actor = new SpeechSynthesisChild(); + if (ContentChild::GetSingleton()->SendPSpeechSynthesisConstructor(actor)) { + mSpeechSynthChild = actor; + } + } +} + +nsSynthVoiceRegistry::~nsSynthVoiceRegistry() { + LOG(LogLevel::Debug, ("~nsSynthVoiceRegistry")); + + mUriVoiceMap.Clear(); +} + +nsSynthVoiceRegistry* nsSynthVoiceRegistry::GetInstance() { + MOZ_ASSERT(NS_IsMainThread()); + + if (!gSynthVoiceRegistry) { + gSynthVoiceRegistry = new nsSynthVoiceRegistry(); + ClearOnShutdown(&gSynthVoiceRegistry); + if (XRE_IsParentProcess()) { + // Start up all speech synth services. + NS_CreateServicesFromCategory(NS_SPEECH_SYNTH_STARTED, nullptr, + NS_SPEECH_SYNTH_STARTED); + } + } + + return gSynthVoiceRegistry; +} + +already_AddRefed<nsSynthVoiceRegistry> +nsSynthVoiceRegistry::GetInstanceForService() { + RefPtr<nsSynthVoiceRegistry> registry = GetInstance(); + + return registry.forget(); +} + +bool nsSynthVoiceRegistry::SendInitialVoicesAndState( + SpeechSynthesisParent* aParent) { + MOZ_ASSERT(XRE_IsParentProcess()); + + nsTArray<RemoteVoice> voices; + nsTArray<nsString> defaults; + + for (uint32_t i = 0; i < mVoices.Length(); ++i) { + RefPtr<VoiceData> voice = mVoices[i]; + + voices.AppendElement(RemoteVoice(voice->mUri, voice->mName, voice->mLang, + voice->mIsLocal, voice->mIsQueued)); + } + + for (uint32_t i = 0; i < mDefaultVoices.Length(); ++i) { + defaults.AppendElement(mDefaultVoices[i]->mUri); + } + + return aParent->SendInitialVoicesAndState(voices, defaults, IsSpeaking()); +} + +void nsSynthVoiceRegistry::RecvInitialVoicesAndState( + const nsTArray<RemoteVoice>& aVoices, const nsTArray<nsString>& aDefaults, + const bool& aIsSpeaking) { + // We really should have a local instance since this is a directed response to + // an Init() call. + MOZ_ASSERT(gSynthVoiceRegistry); + + for (uint32_t i = 0; i < aVoices.Length(); ++i) { + RemoteVoice voice = aVoices[i]; + gSynthVoiceRegistry->AddVoiceImpl(nullptr, voice.voiceURI(), voice.name(), + voice.lang(), voice.localService(), + voice.queued()); + } + + for (uint32_t i = 0; i < aDefaults.Length(); ++i) { + gSynthVoiceRegistry->SetDefaultVoice(aDefaults[i], true); + } + + gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking; + + if (aVoices.Length()) { + gSynthVoiceRegistry->NotifyVoicesChanged(); + } +} + +void nsSynthVoiceRegistry::RecvRemoveVoice(const nsAString& aUri) { + // If we dont have a local instance of the registry yet, we will recieve + // current voices at contruction time. + if (!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->RemoveVoice(nullptr, aUri); +} + +void nsSynthVoiceRegistry::RecvAddVoice(const RemoteVoice& aVoice) { + // If we dont have a local instance of the registry yet, we will recieve + // current voices at contruction time. + if (!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->AddVoiceImpl(nullptr, aVoice.voiceURI(), aVoice.name(), + aVoice.lang(), aVoice.localService(), + aVoice.queued()); +} + +void nsSynthVoiceRegistry::RecvSetDefaultVoice(const nsAString& aUri, + bool aIsDefault) { + // If we dont have a local instance of the registry yet, we will recieve + // current voices at contruction time. + if (!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->SetDefaultVoice(aUri, aIsDefault); +} + +void nsSynthVoiceRegistry::RecvIsSpeakingChanged(bool aIsSpeaking) { + // If we dont have a local instance of the registry yet, we will get the + // speaking state on construction. + if (!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking; +} + +void nsSynthVoiceRegistry::RecvNotifyVoicesChanged() { + // If we dont have a local instance of the registry yet, we don't care. + if (!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->NotifyVoicesChanged(); +} + +void nsSynthVoiceRegistry::RecvNotifyVoicesError(const nsAString& aError) { + // If we dont have a local instance of the registry yet, we don't care. + if (!gSynthVoiceRegistry) { + return; + } + + gSynthVoiceRegistry->NotifyVoicesError(aError); +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::AddVoice(nsISpeechService* aService, + const nsAString& aUri, const nsAString& aName, + const nsAString& aLang, bool aLocalService, + bool aQueuesUtterances) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::AddVoice uri='%s' name='%s' lang='%s' local=%s " + "queued=%s", + NS_ConvertUTF16toUTF8(aUri).get(), NS_ConvertUTF16toUTF8(aName).get(), + NS_ConvertUTF16toUTF8(aLang).get(), aLocalService ? "true" : "false", + aQueuesUtterances ? "true" : "false")); + + if (NS_WARN_IF(XRE_IsContentProcess())) { + return NS_ERROR_NOT_AVAILABLE; + } + + return AddVoiceImpl(aService, aUri, aName, aLang, aLocalService, + aQueuesUtterances); +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::RemoveVoice(nsISpeechService* aService, + const nsAString& aUri) { + LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::RemoveVoice uri='%s' (%s)", + NS_ConvertUTF16toUTF8(aUri).get(), + (XRE_IsContentProcess()) ? "child" : "parent")); + + bool found = false; + VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found); + + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + if (NS_WARN_IF(!(aService == retval->mService))) { + return NS_ERROR_INVALID_ARG; + } + + mVoices.RemoveElement(retval); + mDefaultVoices.RemoveElement(retval); + mUriVoiceMap.Remove(aUri); + + if (retval->mIsQueued && + !StaticPrefs::media_webspeech_synth_force_global_queue()) { + // Check if this is the last queued voice, and disable the global queue if + // it is. + bool queued = false; + for (uint32_t i = 0; i < mVoices.Length(); i++) { + VoiceData* voice = mVoices[i]; + if (voice->mIsQueued) { + queued = true; + break; + } + } + if (!queued) { + mUseGlobalQueue = false; + } + } + + nsTArray<SpeechSynthesisParent*> ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) + Unused << ssplist[i]->SendVoiceRemoved(aUri); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::NotifyVoicesChanged() { + if (XRE_IsParentProcess()) { + nsTArray<SpeechSynthesisParent*> ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) + Unused << ssplist[i]->SendNotifyVoicesChanged(); + } + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (NS_WARN_IF(!(obs))) { + return NS_ERROR_NOT_AVAILABLE; + } + + obs->NotifyObservers(nullptr, "synth-voices-changed", nullptr); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::NotifyVoicesError(const nsAString& aError) { + if (XRE_IsParentProcess()) { + nsTArray<SpeechSynthesisParent*> ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendNotifyVoicesError(aError); + } + } + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (NS_WARN_IF(!(obs))) { + return NS_ERROR_NOT_AVAILABLE; + } + + obs->NotifyObservers(nullptr, "synth-voices-error", aError.BeginReading()); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::SetDefaultVoice(const nsAString& aUri, bool aIsDefault) { + bool found = false; + VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found); + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + mDefaultVoices.RemoveElement(retval); + + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::SetDefaultVoice %s %s", + NS_ConvertUTF16toUTF8(aUri).get(), aIsDefault ? "true" : "false")); + + if (aIsDefault) { + mDefaultVoices.AppendElement(retval); + } + + if (XRE_IsParentProcess()) { + nsTArray<SpeechSynthesisParent*> ssplist; + GetAllSpeechSynthActors(ssplist); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendSetDefaultVoice(aUri, aIsDefault); + } + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoiceCount(uint32_t* aRetval) { + *aRetval = mVoices.Length(); + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoice(uint32_t aIndex, nsAString& aRetval) { + if (NS_WARN_IF(!(aIndex < mVoices.Length()))) { + return NS_ERROR_INVALID_ARG; + } + + aRetval = mVoices[aIndex]->mUri; + + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::IsDefaultVoice(const nsAString& aUri, bool* aRetval) { + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + for (int32_t i = mDefaultVoices.Length(); i > 0;) { + VoiceData* defaultVoice = mDefaultVoices[--i]; + + if (voice->mLang.Equals(defaultVoice->mLang)) { + *aRetval = voice == defaultVoice; + return NS_OK; + } + } + + *aRetval = false; + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::IsLocalVoice(const nsAString& aUri, bool* aRetval) { + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + *aRetval = voice->mIsLocal; + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoiceLang(const nsAString& aUri, nsAString& aRetval) { + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + aRetval = voice->mLang; + return NS_OK; +} + +NS_IMETHODIMP +nsSynthVoiceRegistry::GetVoiceName(const nsAString& aUri, nsAString& aRetval) { + bool found; + VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found); + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + aRetval = voice->mName; + return NS_OK; +} + +nsresult nsSynthVoiceRegistry::AddVoiceImpl( + nsISpeechService* aService, const nsAString& aUri, const nsAString& aName, + const nsAString& aLang, bool aLocalService, bool aQueuesUtterances) { + const bool found = mUriVoiceMap.Contains(aUri); + if (NS_WARN_IF(found)) { + return NS_ERROR_INVALID_ARG; + } + + RefPtr<VoiceData> voice = new VoiceData(aService, aUri, aName, aLang, + aLocalService, aQueuesUtterances); + + mVoices.AppendElement(voice); + mUriVoiceMap.InsertOrUpdate(aUri, std::move(voice)); + mUseGlobalQueue |= aQueuesUtterances; + + nsTArray<SpeechSynthesisParent*> ssplist; + GetAllSpeechSynthActors(ssplist); + + if (!ssplist.IsEmpty()) { + mozilla::dom::RemoteVoice ssvoice(nsString(aUri), nsString(aName), + nsString(aLang), aLocalService, + aQueuesUtterances); + + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendVoiceAdded(ssvoice); + } + } + + return NS_OK; +} + +bool nsSynthVoiceRegistry::FindVoiceByLang(const nsAString& aLang, + VoiceData** aRetval) { + nsAString::const_iterator dashPos, start, end; + aLang.BeginReading(start); + aLang.EndReading(end); + + while (true) { + nsAutoString langPrefix(Substring(start, end)); + + for (int32_t i = mDefaultVoices.Length(); i > 0;) { + VoiceData* voice = mDefaultVoices[--i]; + + if (StringBeginsWith(voice->mLang, langPrefix)) { + *aRetval = voice; + return true; + } + } + + for (int32_t i = mVoices.Length(); i > 0;) { + VoiceData* voice = mVoices[--i]; + + if (StringBeginsWith(voice->mLang, langPrefix)) { + *aRetval = voice; + return true; + } + } + + dashPos = end; + end = start; + + if (!RFindInReadable(u"-"_ns, end, dashPos)) { + break; + } + } + + return false; +} + +VoiceData* nsSynthVoiceRegistry::FindBestMatch(const nsAString& aUri, + const nsAString& aLang) { + if (mVoices.IsEmpty()) { + return nullptr; + } + + bool found = false; + VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found); + + if (found) { + LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::FindBestMatch - Matched URI")); + return retval; + } + + // Try finding a match for given voice. + if (!aLang.IsVoid() && !aLang.IsEmpty()) { + if (FindVoiceByLang(aLang, &retval)) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::FindBestMatch - Matched language (%s ~= %s)", + NS_ConvertUTF16toUTF8(aLang).get(), + NS_ConvertUTF16toUTF8(retval->mLang).get())); + + return retval; + } + } + + // Try UI language. + nsAutoCString uiLang; + LocaleService::GetInstance()->GetAppLocaleAsBCP47(uiLang); + + if (FindVoiceByLang(NS_ConvertASCIItoUTF16(uiLang), &retval)) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::FindBestMatch - Matched UI language (%s ~= %s)", + uiLang.get(), NS_ConvertUTF16toUTF8(retval->mLang).get())); + + return retval; + } + + // Try en-US, the language of locale "C" + if (FindVoiceByLang(u"en-US"_ns, &retval)) { + LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::FindBestMatch - Matched C " + "locale language (en-US ~= %s)", + NS_ConvertUTF16toUTF8(retval->mLang).get())); + + return retval; + } + + // The top default voice is better than nothing... + if (!mDefaultVoices.IsEmpty()) { + return mDefaultVoices.LastElement(); + } + + return nullptr; +} + +already_AddRefed<nsSpeechTask> nsSynthVoiceRegistry::SpeakUtterance( + SpeechSynthesisUtterance& aUtterance, const nsAString& aDocLang) { + nsString lang = + nsString(aUtterance.mLang.IsEmpty() ? aDocLang : aUtterance.mLang); + nsAutoString uri; + + if (aUtterance.mVoice) { + aUtterance.mVoice->GetVoiceURI(uri); + } + + // Get current audio volume to apply speech call + float volume = aUtterance.Volume(); + RefPtr<AudioChannelService> service = AudioChannelService::GetOrCreate(); + if (service) { + if (nsCOMPtr<nsPIDOMWindowInner> topWindow = aUtterance.GetOwner()) { + // TODO : use audio channel agent, open new bug to fix it. + AudioPlaybackConfig config = + service->GetMediaConfig(topWindow->GetOuterWindow()); + volume = config.mMuted ? 0.0f : config.mVolume * volume; + } + } + + RefPtr<nsSpeechTask> task; + if (XRE_IsContentProcess()) { + task = new SpeechTaskChild(&aUtterance, + aUtterance.ShouldResistFingerprinting()); + SpeechSynthesisRequestChild* actor = new SpeechSynthesisRequestChild( + static_cast<SpeechTaskChild*>(task.get())); + mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor( + actor, aUtterance.mText, lang, uri, volume, aUtterance.Rate(), + aUtterance.Pitch(), aUtterance.ShouldResistFingerprinting()); + } else { + task = + new nsSpeechTask(&aUtterance, aUtterance.ShouldResistFingerprinting()); + Speak(aUtterance.mText, lang, uri, volume, aUtterance.Rate(), + aUtterance.Pitch(), task); + } + + return task.forget(); +} + +void nsSynthVoiceRegistry::Speak(const nsAString& aText, const nsAString& aLang, + const nsAString& aUri, const float& aVolume, + const float& aRate, const float& aPitch, + nsSpeechTask* aTask) { + MOZ_ASSERT(XRE_IsParentProcess()); + + if (aTask->ShouldResistFingerprinting()) { + aTask->ForceError(0, 0); + return; + } + + VoiceData* voice = FindBestMatch(aUri, aLang); + + if (!voice) { + NS_WARNING("No voices found."); + aTask->ForceError(0, 0); + return; + } + + aTask->SetChosenVoiceURI(voice->mUri); + + if (mUseGlobalQueue || + StaticPrefs::media_webspeech_synth_force_global_queue()) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' " + "rate=%f pitch=%f", + NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(), + NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch)); + RefPtr<GlobalQueueItem> item = + new GlobalQueueItem(voice, aTask, aText, aVolume, aRate, aPitch); + mGlobalQueue.AppendElement(item); + + if (mGlobalQueue.Length() == 1) { + SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, + item->mRate, item->mPitch); + } + } else { + SpeakImpl(voice, aTask, aText, aVolume, aRate, aPitch); + } +} + +void nsSynthVoiceRegistry::SpeakNext() { + MOZ_ASSERT(XRE_IsParentProcess()); + + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::SpeakNext %d", mGlobalQueue.IsEmpty())); + + SetIsSpeaking(false); + + if (mGlobalQueue.IsEmpty()) { + return; + } + + mGlobalQueue.RemoveElementAt(0); + + while (!mGlobalQueue.IsEmpty()) { + RefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0); + if (item->mTask->IsPreCanceled()) { + mGlobalQueue.RemoveElementAt(0); + continue; + } + if (!item->mTask->IsPrePaused()) { + SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, + item->mRate, item->mPitch); + } + break; + } +} + +void nsSynthVoiceRegistry::ResumeQueue() { + MOZ_ASSERT(XRE_IsParentProcess()); + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::ResumeQueue %d", mGlobalQueue.IsEmpty())); + + if (mGlobalQueue.IsEmpty()) { + return; + } + + RefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0); + if (!item->mTask->IsPrePaused()) { + SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, + item->mRate, item->mPitch); + } +} + +bool nsSynthVoiceRegistry::IsSpeaking() { return mIsSpeaking; } + +void nsSynthVoiceRegistry::SetIsSpeaking(bool aIsSpeaking) { + MOZ_ASSERT(XRE_IsParentProcess()); + + // Only set to 'true' if global queue is enabled. + mIsSpeaking = + aIsSpeaking && (mUseGlobalQueue || + StaticPrefs::media_webspeech_synth_force_global_queue()); + + nsTArray<SpeechSynthesisParent*> ssplist; + GetAllSpeechSynthActors(ssplist); + for (uint32_t i = 0; i < ssplist.Length(); ++i) { + Unused << ssplist[i]->SendIsSpeakingChanged(aIsSpeaking); + } +} + +void nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice, nsSpeechTask* aTask, + const nsAString& aText, + const float& aVolume, const float& aRate, + const float& aPitch) { + LOG(LogLevel::Debug, + ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f " + "pitch=%f", + NS_ConvertUTF16toUTF8(aText).get(), + NS_ConvertUTF16toUTF8(aVoice->mUri).get(), aRate, aPitch)); + + aTask->Init(); + + if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate, + aPitch, aTask))) { + aTask->DispatchError(0, 0); + } +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h new file mode 100644 index 0000000000..d3a5476ae0 --- /dev/null +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h @@ -0,0 +1,101 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_nsSynthVoiceRegistry_h +#define mozilla_dom_nsSynthVoiceRegistry_h + +#include "nsISynthVoiceRegistry.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" + +class nsISpeechService; + +namespace mozilla::dom { + +class RemoteVoice; +class SpeechSynthesisUtterance; +class SpeechSynthesisChild; +class SpeechSynthesisParent; +class nsSpeechTask; +class VoiceData; +class GlobalQueueItem; + +class nsSynthVoiceRegistry final : public nsISynthVoiceRegistry { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISYNTHVOICEREGISTRY + + nsSynthVoiceRegistry(); + + already_AddRefed<nsSpeechTask> SpeakUtterance( + SpeechSynthesisUtterance& aUtterance, const nsAString& aDocLang); + + void Speak(const nsAString& aText, const nsAString& aLang, + const nsAString& aUri, const float& aVolume, const float& aRate, + const float& aPitch, nsSpeechTask* aTask); + + bool SendInitialVoicesAndState(SpeechSynthesisParent* aParent); + + void SpeakNext(); + + void ResumeQueue(); + + bool IsSpeaking(); + + void SetIsSpeaking(bool aIsSpeaking); + + static nsSynthVoiceRegistry* GetInstance(); + + static already_AddRefed<nsSynthVoiceRegistry> GetInstanceForService(); + + static void RecvInitialVoicesAndState(const nsTArray<RemoteVoice>& aVoices, + const nsTArray<nsString>& aDefaults, + const bool& aIsSpeaking); + + static void RecvRemoveVoice(const nsAString& aUri); + + static void RecvAddVoice(const RemoteVoice& aVoice); + + static void RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault); + + static void RecvIsSpeakingChanged(bool aIsSpeaking); + + static void RecvNotifyVoicesChanged(); + + static void RecvNotifyVoicesError(const nsAString& aError); + + private: + virtual ~nsSynthVoiceRegistry(); + + VoiceData* FindBestMatch(const nsAString& aUri, const nsAString& lang); + + bool FindVoiceByLang(const nsAString& aLang, VoiceData** aRetval); + + nsresult AddVoiceImpl(nsISpeechService* aService, const nsAString& aUri, + const nsAString& aName, const nsAString& aLang, + bool aLocalService, bool aQueuesUtterances); + + void SpeakImpl(VoiceData* aVoice, nsSpeechTask* aTask, const nsAString& aText, + const float& aVolume, const float& aRate, const float& aPitch); + + nsTArray<RefPtr<VoiceData>> mVoices; + + nsTArray<RefPtr<VoiceData>> mDefaultVoices; + + nsRefPtrHashtable<nsStringHashKey, VoiceData> mUriVoiceMap; + + RefPtr<SpeechSynthesisChild> mSpeechSynthChild; + + bool mUseGlobalQueue; + + nsTArray<RefPtr<GlobalQueueItem>> mGlobalQueue; + + bool mIsSpeaking; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp new file mode 100644 index 0000000000..c0944cf24f --- /dev/null +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp @@ -0,0 +1,558 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SpeechDispatcherService.h" + +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/Preferences.h" +#include "mozilla/StaticPrefs_media.h" +#include "nsEscape.h" +#include "nsISupports.h" +#include "nsPrintfCString.h" +#include "nsReadableUtils.h" +#include "nsServiceManagerUtils.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "prlink.h" + +#include <math.h> +#include <stdlib.h> + +#define URI_PREFIX "urn:moz-tts:speechd:" + +#define MAX_RATE static_cast<float>(2.5) +#define MIN_RATE static_cast<float>(0.5) + +// Some structures for libspeechd +typedef enum { + SPD_EVENT_BEGIN, + SPD_EVENT_END, + SPD_EVENT_INDEX_MARK, + SPD_EVENT_CANCEL, + SPD_EVENT_PAUSE, + SPD_EVENT_RESUME +} SPDNotificationType; + +typedef enum { + SPD_BEGIN = 1, + SPD_END = 2, + SPD_INDEX_MARKS = 4, + SPD_CANCEL = 8, + SPD_PAUSE = 16, + SPD_RESUME = 32, + + SPD_ALL = 0x3f +} SPDNotification; + +typedef enum { SPD_MODE_SINGLE = 0, SPD_MODE_THREADED = 1 } SPDConnectionMode; + +typedef void (*SPDCallback)(size_t msg_id, size_t client_id, + SPDNotificationType state); + +typedef void (*SPDCallbackIM)(size_t msg_id, size_t client_id, + SPDNotificationType state, char* index_mark); + +struct SPDConnection { + SPDCallback callback_begin; + SPDCallback callback_end; + SPDCallback callback_cancel; + SPDCallback callback_pause; + SPDCallback callback_resume; + SPDCallbackIM callback_im; + + /* partial, more private fields in structure */ +}; + +struct SPDVoice { + char* name; + char* language; + char* variant; +}; + +typedef enum { + SPD_IMPORTANT = 1, + SPD_MESSAGE = 2, + SPD_TEXT = 3, + SPD_NOTIFICATION = 4, + SPD_PROGRESS = 5 +} SPDPriority; + +#define SPEECHD_FUNCTIONS \ + FUNC(spd_open, SPDConnection*, \ + (const char*, const char*, const char*, SPDConnectionMode)) \ + FUNC(spd_close, void, (SPDConnection*)) \ + FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*)) \ + FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*)) \ + FUNC(spd_cancel, int, (SPDConnection*)) \ + FUNC(spd_set_volume, int, (SPDConnection*, int)) \ + FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \ + FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \ + FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \ + FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification)) + +#define FUNC(name, type, params) \ + typedef type(*_##name##_fn) params; \ + static _##name##_fn _##name; + +SPEECHD_FUNCTIONS + +#undef FUNC + +#define spd_open _spd_open +#define spd_close _spd_close +#define spd_list_synthesis_voices _spd_list_synthesis_voices +#define spd_say _spd_say +#define spd_cancel _spd_cancel +#define spd_set_volume _spd_set_volume +#define spd_set_voice_rate _spd_set_voice_rate +#define spd_set_voice_pitch _spd_set_voice_pitch +#define spd_set_synthesis_voice _spd_set_synthesis_voice +#define spd_set_notification_on _spd_set_notification_on + +static PRLibrary* speechdLib = nullptr; + +typedef void (*nsSpeechDispatcherFunc)(); +struct nsSpeechDispatcherDynamicFunction { + const char* functionName; + nsSpeechDispatcherFunc* function; +}; + +namespace mozilla::dom { + +StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton; + +class SpeechDispatcherVoice { + public: + SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage) + : mName(aName), mLanguage(aLanguage) {} + + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice) + + // Voice name + nsString mName; + + // Voice language, in BCP-47 syntax + nsString mLanguage; + + private: + ~SpeechDispatcherVoice() = default; +}; + +class SpeechDispatcherCallback final : public nsISpeechTaskCallback { + public: + SpeechDispatcherCallback(nsISpeechTask* aTask, + SpeechDispatcherService* aService) + : mTask(aTask), mService(aService) {} + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback, + nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + bool OnSpeechEvent(SPDNotificationType state); + + private: + ~SpeechDispatcherCallback() = default; + + // This pointer is used to dispatch events + nsCOMPtr<nsISpeechTask> mTask; + + // By holding a strong reference to the service we guarantee that it won't be + // destroyed before this runnable. + RefPtr<SpeechDispatcherService> mService; + + TimeStamp mStartTime; +}; + +NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback) + +NS_IMETHODIMP +SpeechDispatcherCallback::OnPause() { + // XXX: Speech dispatcher does not pause immediately, but waits for the speech + // to reach an index mark so that it could resume from that offset. + // There is no support for word or sentence boundaries, so index marks would + // only occur in explicit SSML marks, and we don't support that yet. + // What in actuality happens, is that if you call spd_pause(), it will speak + // the utterance in its entirety, dispatch an end event, and then put speechd + // in a 'paused' state. Since it is after the utterance ended, we don't get + // that state change, and our speech api is in an unrecoverable state. + // So, since it is useless anyway, I am not implementing pause. + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnResume() { + // XXX: Unsupported, see OnPause(). + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnCancel() { + if (spd_cancel(mService->mSpeechdClient) < 0) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +NS_IMETHODIMP +SpeechDispatcherCallback::OnVolumeChanged(float aVolume) { + // XXX: This currently does not change the volume mid-utterance, but it + // doesn't do anything bad either. So we could put this here with the hopes + // that speechd supports this in the future. + if (spd_set_volume(mService->mSpeechdClient, + static_cast<int>(aVolume * 100)) < 0) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +bool SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) { + bool remove = false; + + switch (state) { + case SPD_EVENT_BEGIN: + mStartTime = TimeStamp::Now(); + mTask->DispatchStart(); + break; + + case SPD_EVENT_PAUSE: + mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + break; + + case SPD_EVENT_RESUME: + mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + break; + + case SPD_EVENT_CANCEL: + case SPD_EVENT_END: + mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0); + remove = true; + break; + + case SPD_EVENT_INDEX_MARK: + // Not yet supported + break; + + default: + break; + } + + return remove; +} + +static void speechd_cb(size_t msg_id, size_t client_id, + SPDNotificationType state) { + SpeechDispatcherService* service = + SpeechDispatcherService::GetInstance(false); + + if (service) { + NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>( + "dom::SpeechDispatcherService::EventNotify", service, + &SpeechDispatcherService::EventNotify, static_cast<uint32_t>(msg_id), + state)); + } +} + +NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(SpeechDispatcherService) +NS_IMPL_RELEASE(SpeechDispatcherService) + +SpeechDispatcherService::SpeechDispatcherService() + : mInitialized(false), mSpeechdClient(nullptr) {} + +void SpeechDispatcherService::Init() { + if (!StaticPrefs::media_webspeech_synth_enabled() || + Preferences::GetBool("media.webspeech.synth.test")) { + return; + } + + // While speech dispatcher has a "threaded" mode, only spd_say() is async. + // Since synchronous socket i/o could impact startup time, we do + // initialization in a separate thread. + DebugOnly<nsresult> rv = + NS_NewNamedThread("speechd init", getter_AddRefs(mInitThread)); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + rv = mInitThread->Dispatch( + NewRunnableMethod("dom::SpeechDispatcherService::Setup", this, + &SpeechDispatcherService::Setup), + NS_DISPATCH_NORMAL); + MOZ_ASSERT(NS_SUCCEEDED(rv)); +} + +SpeechDispatcherService::~SpeechDispatcherService() { + if (mInitThread) { + mInitThread->Shutdown(); + } + + if (mSpeechdClient) { + spd_close(mSpeechdClient); + } +} + +void SpeechDispatcherService::Setup() { +#define FUNC(name, type, params) {#name, (nsSpeechDispatcherFunc*)&_##name}, + static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = { + SPEECHD_FUNCTIONS}; +#undef FUNC + + MOZ_ASSERT(!mInitialized); + + speechdLib = PR_LoadLibrary("libspeechd.so.2"); + + if (!speechdLib) { + NS_WARNING("Failed to load speechd library"); + NotifyError(u"lib-missing"_ns); + return; + } + + if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) { + // There is no version getter function, so we rely on a symbol that was + // introduced in release 0.8.2 in order to check for ABI compatibility. + NS_WARNING("Unsupported version of speechd detected"); + NotifyError(u"lib-too-old"_ns); + return; + } + + for (uint32_t i = 0; i < ArrayLength(kSpeechDispatcherSymbols); i++) { + *kSpeechDispatcherSymbols[i].function = PR_FindFunctionSymbol( + speechdLib, kSpeechDispatcherSymbols[i].functionName); + + if (!*kSpeechDispatcherSymbols[i].function) { + NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'", + kSpeechDispatcherSymbols[i].functionName) + .get()); + NotifyError(u"missing-symbol"_ns); + return; + } + } + + mSpeechdClient = + spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED); + if (!mSpeechdClient) { + NS_WARNING("Failed to call spd_open"); + NotifyError(u"open-fail"_ns); + return; + } + + // Get all the voices from sapi and register in the SynthVoiceRegistry + SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient); + + mSpeechdClient->callback_begin = speechd_cb; + mSpeechdClient->callback_end = speechd_cb; + mSpeechdClient->callback_cancel = speechd_cb; + mSpeechdClient->callback_pause = speechd_cb; + mSpeechdClient->callback_resume = speechd_cb; + + spd_set_notification_on(mSpeechdClient, SPD_BEGIN); + spd_set_notification_on(mSpeechdClient, SPD_END); + spd_set_notification_on(mSpeechdClient, SPD_CANCEL); + + if (list != NULL) { + for (int i = 0; list[i]; i++) { + nsAutoString uri; + + uri.AssignLiteral(URI_PREFIX); + nsAutoCString name; + NS_EscapeURL(list[i]->name, -1, + esc_OnlyNonASCII | esc_Spaces | esc_AlwaysCopy, name); + uri.Append(NS_ConvertUTF8toUTF16(name)); + + uri.AppendLiteral("?"); + + nsAutoCString lang(list[i]->language); + + uri.Append(NS_ConvertUTF8toUTF16(lang)); + + mVoices.InsertOrUpdate(uri, MakeRefPtr<SpeechDispatcherVoice>( + NS_ConvertUTF8toUTF16(list[i]->name), + NS_ConvertUTF8toUTF16(lang))); + } + } + + if (mVoices.Count() == 0) { + NotifyError(u"no-voices"_ns); + } + + NS_DispatchToMainThread( + NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices", this, + &SpeechDispatcherService::RegisterVoices)); + + // mInitialized = true; +} + +// private methods + +void SpeechDispatcherService::NotifyError(const nsString& aError) { + if (!NS_IsMainThread()) { + NS_DispatchToMainThread(NewRunnableMethod<const nsString>( + "dom::SpeechDispatcherService::NotifyError", this, + &SpeechDispatcherService::NotifyError, aError)); + return; + } + + RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance(); + DebugOnly<nsresult> rv = registry->NotifyVoicesError(aError); +} + +void SpeechDispatcherService::RegisterVoices() { + RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance(); + for (const auto& entry : mVoices) { + const RefPtr<SpeechDispatcherVoice>& voice = entry.GetData(); + + // This service can only speak one utterance at a time, so we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + DebugOnly<nsresult> rv = + registry->AddVoice(this, entry.GetKey(), voice->mName, voice->mLanguage, + voice->mName.EqualsLiteral("default"), true); + + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice"); + } + + mInitThread->Shutdown(); + mInitThread = nullptr; + + mInitialized = true; + + registry->NotifyVoicesChanged(); +} + +// nsIObserver + +NS_IMETHODIMP +SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + return NS_OK; +} + +// nsISpeechService + +// TODO: Support SSML +NS_IMETHODIMP +SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) { + if (NS_WARN_IF(!mInitialized)) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr<SpeechDispatcherCallback> callback = + new SpeechDispatcherCallback(aTask, this); + + bool found = false; + SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found); + + if (NS_WARN_IF(!(found))) { + return NS_ERROR_NOT_AVAILABLE; + } + + spd_set_synthesis_voice(mSpeechdClient, + NS_ConvertUTF16toUTF8(voice->mName).get()); + + // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100. + spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100)); + + // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate. + // speechd expects -100 to 100 with 0 being normal rate. + float rate = 0; + if (aRate > 1) { + // Each step to 100 is logarithmically distributed up to 2.5x. + rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100; + } else if (aRate < 1) { + // Each step to -100 is logarithmically distributed down to 0.5x. + rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100; + } + + spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate)); + + // We provide a pitch of 0 to 2 with 1 being the default. + // speech-dispatcher expects -100 to 100 with 0 being default. + spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100)); + + nsresult rv = aTask->Setup(callback); + + if (NS_FAILED(rv)) { + return rv; + } + + if (aText.Length()) { + int msg_id = spd_say(mSpeechdClient, SPD_MESSAGE, + NS_ConvertUTF16toUTF8(aText).get()); + + if (msg_id < 0) { + return NS_ERROR_FAILURE; + } + + mCallbacks.InsertOrUpdate(msg_id, std::move(callback)); + } else { + // Speech dispatcher does not work well with empty strings. + // In that case, don't send empty string to speechd, + // and just emulate a speechd start and end event. + NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>( + "dom::SpeechDispatcherCallback::OnSpeechEvent", callback, + &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN)); + + NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>( + "dom::SpeechDispatcherCallback::OnSpeechEvent", callback, + &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END)); + } + + return NS_OK; +} + +SpeechDispatcherService* SpeechDispatcherService::GetInstance(bool create) { + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT( + false, + "SpeechDispatcherService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton && create) { + sSingleton = new SpeechDispatcherService(); + sSingleton->Init(); + ClearOnShutdown(&sSingleton); + } + + return sSingleton; +} + +already_AddRefed<SpeechDispatcherService> +SpeechDispatcherService::GetInstanceForService() { + MOZ_ASSERT(NS_IsMainThread()); + RefPtr<SpeechDispatcherService> sapiService = GetInstance(); + return sapiService.forget(); +} + +void SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) { + SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId); + + if (callback) { + if (callback->OnSpeechEvent((SPDNotificationType)aState)) { + mCallbacks.Remove(aMsgId); + } + } +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h new file mode 100644 index 0000000000..ac67f64d0f --- /dev/null +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h @@ -0,0 +1,67 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SpeechDispatcherService_h +#define mozilla_dom_SpeechDispatcherService_h + +#include "mozilla/StaticPtr.h" +#include "nsIObserver.h" +#include "nsISpeechService.h" +#include "nsIThread.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" + +struct SPDConnection; + +namespace mozilla { +namespace dom { + +class SpeechDispatcherCallback; +class SpeechDispatcherVoice; + +class SpeechDispatcherService final : public nsIObserver, + public nsISpeechService { + friend class SpeechDispatcherCallback; + + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIOBSERVER + NS_DECL_NSISPEECHSERVICE + + SpeechDispatcherService(); + + void Init(); + + void Setup(); + + void EventNotify(uint32_t aMsgId, uint32_t aState); + + static SpeechDispatcherService* GetInstance(bool create = true); + static already_AddRefed<SpeechDispatcherService> GetInstanceForService(); + + static StaticRefPtr<SpeechDispatcherService> sSingleton; + + private: + virtual ~SpeechDispatcherService(); + + void NotifyError(const nsString& aError); + + void RegisterVoices(); + + bool mInitialized; + + SPDConnection* mSpeechdClient; + + nsRefPtrHashtable<nsUint32HashKey, SpeechDispatcherCallback> mCallbacks; + + nsCOMPtr<nsIThread> mInitThread; + + nsRefPtrHashtable<nsStringHashKey, SpeechDispatcherVoice> mVoices; +}; + +} // namespace dom +} // namespace mozilla +#endif diff --git a/dom/media/webspeech/synth/speechd/components.conf b/dom/media/webspeech/synth/speechd/components.conf new file mode 100644 index 0000000000..56b01ba5cb --- /dev/null +++ b/dom/media/webspeech/synth/speechd/components.conf @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{8817b1cf-5ada-43bf-bd73-607657703d0d}', + 'contract_ids': ['@mozilla.org/synthspeechdispatcher;1'], + 'singleton': True, + 'type': 'mozilla::dom::SpeechDispatcherService', + 'headers': ['/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h'], + 'constructor': 'mozilla::dom::SpeechDispatcherService::GetInstanceForService', + 'categories': {"speech-synth-started": 'SpeechDispatcher Speech Synth'}, + }, +] diff --git a/dom/media/webspeech/synth/speechd/moz.build b/dom/media/webspeech/synth/speechd/moz.build new file mode 100644 index 0000000000..0d9632a488 --- /dev/null +++ b/dom/media/webspeech/synth/speechd/moz.build @@ -0,0 +1,15 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += ["SpeechDispatcherService.cpp"] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul" diff --git a/dom/media/webspeech/synth/test/common.js b/dom/media/webspeech/synth/test/common.js new file mode 100644 index 0000000000..c22b0b488c --- /dev/null +++ b/dom/media/webspeech/synth/test/common.js @@ -0,0 +1,104 @@ +function synthTestQueue(aTestArgs, aEndFunc) { + var utterances = []; + for (var i in aTestArgs) { + var uargs = aTestArgs[i][0]; + var win = uargs.win || window; + var u = new win.SpeechSynthesisUtterance(uargs.text); + + if (uargs.args) { + for (var attr in uargs.args) { + u[attr] = uargs.args[attr]; + } + } + + function onend_handler(e) { + is(e.target, utterances.shift(), "Target matches utterances"); + ok(!speechSynthesis.speaking, "speechSynthesis is not speaking."); + + if (utterances.length) { + ok(speechSynthesis.pending, "other utterances queued"); + } else { + ok(!speechSynthesis.pending, "queue is empty, nothing pending."); + if (aEndFunc) { + aEndFunc(); + } + } + } + + u.addEventListener( + "start", + (function (expectedUri) { + return function (e) { + if (expectedUri) { + var chosenVoice = SpecialPowers.wrap(e).target.chosenVoiceURI; + is(chosenVoice, expectedUri, "Incorrect URI is used"); + } + }; + })(aTestArgs[i][1] ? aTestArgs[i][1].uri : null) + ); + + u.addEventListener("end", onend_handler); + u.addEventListener("error", onend_handler); + + u.addEventListener( + "error", + (function (expectedError) { + return function onerror_handler(e) { + ok( + expectedError, + "Error in speech utterance '" + e.target.text + "'" + ); + }; + })(aTestArgs[i][1] ? aTestArgs[i][1].err : false) + ); + + utterances.push(u); + win.speechSynthesis.speak(u); + } + + ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet."); + ok(speechSynthesis.pending, "speechSynthesis has an utterance queued."); +} + +function loadFrame(frameId) { + return new Promise(function (resolve, reject) { + var frame = document.getElementById(frameId); + frame.addEventListener("load", function (e) { + frame.contentWindow.document.title = frameId; + resolve(frame); + }); + frame.src = "about:blank"; + }); +} + +function waitForVoices(win) { + return new Promise(resolve => { + function resolver() { + if (win.speechSynthesis.getVoices().length) { + win.speechSynthesis.removeEventListener("voiceschanged", resolver); + resolve(); + } + } + + win.speechSynthesis.addEventListener("voiceschanged", resolver); + resolver(); + }); +} + +function loadSpeechTest(fileName, prefs, frameId = "testFrame") { + loadFrame(frameId).then(frame => { + waitForVoices(frame.contentWindow).then( + () => (document.getElementById("testFrame").src = fileName) + ); + }); +} + +function testSynthState(win, expectedState) { + for (var attr in expectedState) { + is( + win.speechSynthesis[attr], + expectedState[attr], + win.document.title + ": '" + attr + '" does not match' + ); + } +} diff --git a/dom/media/webspeech/synth/test/components.conf b/dom/media/webspeech/synth/test/components.conf new file mode 100644 index 0000000000..f37e4eafae --- /dev/null +++ b/dom/media/webspeech/synth/test/components.conf @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{e7d52d9e-c148-47d8-ab2a-95d7f40ea53d}', + 'contract_ids': ['@mozilla.org/fakesynth;1'], + 'singleton': True, + 'type': 'mozilla::dom::nsFakeSynthServices', + 'headers': ['/dom/media/webspeech/synth/test/nsFakeSynthServices.h'], + 'constructor': 'mozilla::dom::nsFakeSynthServices::GetInstanceForService', + 'categories': {'speech-synth-started': 'Fake Speech Synth'}, + }, +] diff --git a/dom/media/webspeech/synth/test/file_bfcache_page1.html b/dom/media/webspeech/synth/test/file_bfcache_page1.html new file mode 100644 index 0000000000..d6229eeeda --- /dev/null +++ b/dom/media/webspeech/synth/test/file_bfcache_page1.html @@ -0,0 +1,18 @@ +<!DOCTYPE HTML> +<html> +<head> + <meta charset="utf-8"> + <script type="application/javascript"> + addEventListener('pageshow', function onshow(evt) { + var u = new SpeechSynthesisUtterance('hello'); + u.lang = 'it-IT-noend'; + u.addEventListener('start', function() { + location = "file_bfcache_page2.html"; + }); + speechSynthesis.speak(u); + }); + </script> +</head> +<body> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_bfcache_page2.html b/dom/media/webspeech/synth/test/file_bfcache_page2.html new file mode 100644 index 0000000000..30b9aa9117 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_bfcache_page2.html @@ -0,0 +1,14 @@ +<html> +<script> +var frameUnloaded = function() { + var u = new SpeechSynthesisUtterance('hi'); + u.addEventListener('end', function () { + opener.ok(true, 'Successfully spoke utterance from new frame.'); + opener.onDone(); + }); + speechSynthesis.speak(u); +}; +</script> + +<body onpageshow="frameUnloaded()"></body></html> + diff --git a/dom/media/webspeech/synth/test/file_global_queue.html b/dom/media/webspeech/synth/test/file_global_queue.html new file mode 100644 index 0000000000..5d762c0d51 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_global_queue.html @@ -0,0 +1,69 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1188099 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1188099: Global queue should correctly schedule utterances</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + window.todo = parent.todo; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a> +<iframe id="frame1"></iframe> +<iframe id="frame2"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + Promise.all([loadFrame('frame1'), loadFrame('frame2')]).then(function ([frame1, frame2]) { + var win1 = frame1.contentWindow; + var win2 = frame2.contentWindow; + var utterance1 = new win1.SpeechSynthesisUtterance("hello, losers"); + var utterance2 = new win1.SpeechSynthesisUtterance("hello, losers three"); + var utterance3 = new win2.SpeechSynthesisUtterance("hello, losers too"); + var eventOrder = ['start1', 'end1', 'start3', 'end3', 'start2', 'end2']; + utterance1.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start1', 'start1'); + testSynthState(win1, { speaking: true, pending: true }); + testSynthState(win2, { speaking: true, pending: true }); + }); + utterance1.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end1', 'end1'); + }); + utterance3.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start3', 'start3'); + testSynthState(win1, { speaking: true, pending: true }); + testSynthState(win2, { speaking: true, pending: false }); + }); + utterance3.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end3', 'end3'); + }); + utterance2.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start2', 'start2'); + testSynthState(win1, { speaking: true, pending: false }); + testSynthState(win2, { speaking: true, pending: false }); + }); + utterance2.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end2', 'end2'); + testSynthState(win1, { speaking: false, pending: false }); + testSynthState(win2, { speaking: false, pending: false }); + SimpleTest.finish(); + }); + win1.speechSynthesis.speak(utterance1); + win1.speechSynthesis.speak(utterance2); + win2.speechSynthesis.speak(utterance3); + }); +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_global_queue_cancel.html b/dom/media/webspeech/synth/test/file_global_queue_cancel.html new file mode 100644 index 0000000000..03b77ba2fc --- /dev/null +++ b/dom/media/webspeech/synth/test/file_global_queue_cancel.html @@ -0,0 +1,88 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1188099 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1188099: Calling cancel() should work correctly with global queue</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + window.todo = parent.todo; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a> +<iframe id="frame1"></iframe> +<iframe id="frame2"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + Promise.all([loadFrame('frame1'), loadFrame('frame2')]).then(function ([frame1, frame2]) { + var win1 = frame1.contentWindow; + var win2 = frame2.contentWindow; + + var utterance1 = new win1.SpeechSynthesisUtterance( + "u1: Donec ac nunc feugiat, posuere"); + utterance1.lang = 'it-IT-noend'; + var utterance2 = new win1.SpeechSynthesisUtterance("u2: hello, losers too"); + utterance2.lang = 'it-IT-noend'; + var utterance3 = new win1.SpeechSynthesisUtterance("u3: hello, losers three"); + + var utterance4 = new win2.SpeechSynthesisUtterance("u4: hello, losers same!"); + utterance4.lang = 'it-IT-noend'; + var utterance5 = new win2.SpeechSynthesisUtterance("u5: hello, losers too"); + utterance5.lang = 'it-IT-noend'; + + var eventOrder = ['start1', 'end1', 'start2', 'end2']; + utterance1.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start1', 'start1'); + testSynthState(win1, { speaking: true, pending: true }); + testSynthState(win2, { speaking: true, pending: true }); + win2.speechSynthesis.cancel(); + SpecialPowers.wrap(win1.speechSynthesis).forceEnd(); + + }); + utterance1.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end1', 'end1'); + testSynthState(win1, { pending: true }); + testSynthState(win2, { pending: false }); + }); + utterance2.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start2', 'start2'); + testSynthState(win1, { speaking: true, pending: true }); + testSynthState(win2, { speaking: true, pending: false }); + win1.speechSynthesis.cancel(); + }); + utterance2.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end2', 'end2'); + testSynthState(win1, { speaking: false, pending: false }); + testSynthState(win2, { speaking: false, pending: false }); + SimpleTest.finish(); + }); + + function wrongUtterance(e) { + ok(false, 'This shall not be uttered: "' + e.target.text + '"'); + } + + utterance3.addEventListener('start', wrongUtterance); + utterance4.addEventListener('start', wrongUtterance); + utterance5.addEventListener('start', wrongUtterance); + + win1.speechSynthesis.speak(utterance1); + win1.speechSynthesis.speak(utterance2); + win1.speechSynthesis.speak(utterance3); + win2.speechSynthesis.speak(utterance4); + win2.speechSynthesis.speak(utterance5); + }); +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_global_queue_pause.html b/dom/media/webspeech/synth/test/file_global_queue_pause.html new file mode 100644 index 0000000000..e345eb4c98 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_global_queue_pause.html @@ -0,0 +1,130 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1188099 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1188099: Calling pause() should work correctly with global queue</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + window.todo = parent.todo; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a> +<iframe id="frame1"></iframe> +<iframe id="frame2"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + Promise.all([loadFrame('frame1'), loadFrame('frame2')]).then(function ([frame1, frame2]) { + var win1 = frame1.contentWindow; + var win2 = frame2.contentWindow; + + var utterance1 = new win1.SpeechSynthesisUtterance("Speak utterance 1."); + utterance1.lang = 'it-IT-noend'; + var utterance2 = new win2.SpeechSynthesisUtterance("Speak utterance 2."); + var utterance3 = new win1.SpeechSynthesisUtterance("Speak utterance 3."); + var utterance4 = new win2.SpeechSynthesisUtterance("Speak utterance 4."); + var eventOrder = ['start1', 'pause1', 'resume1', 'end1', 'start2', 'end2', + 'start4', 'end4', 'start3', 'end3']; + + utterance1.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start1', 'start1'); + win1.speechSynthesis.pause(); + }); + utterance1.addEventListener('pause', function(e) { + var expectedEvent = eventOrder.shift() + is(expectedEvent, 'pause1', 'pause1'); + testSynthState(win1, { speaking: true, pending: false, paused: true}); + testSynthState(win2, { speaking: true, pending: true, paused: false}); + + if (expectedEvent == 'pause1') { + win1.speechSynthesis.resume(); + } + }); + utterance1.addEventListener('resume', function(e) { + is(eventOrder.shift(), 'resume1', 'resume1'); + testSynthState(win1, { speaking: true, pending: false, paused: false}); + testSynthState(win2, { speaking: true, pending: true, paused: false}); + + win2.speechSynthesis.pause(); + + testSynthState(win1, { speaking: true, pending: false, paused: false}); + testSynthState(win2, { speaking: true, pending: true, paused: true }); + + // We now make the utterance end. + SpecialPowers.wrap(win1.speechSynthesis).forceEnd(); + }); + utterance1.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end1', 'end1'); + testSynthState(win1, { speaking: false, pending: false, paused: false}); + testSynthState(win2, { speaking: false, pending: true, paused: true}); + + win2.speechSynthesis.resume(); + }); + + utterance2.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start2', 'start2'); + testSynthState(win1, { speaking: true, pending: false, paused: false}); + testSynthState(win2, { speaking: true, pending: false, paused: false}); + }); + utterance2.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end2', 'end2'); + testSynthState(win1, { speaking: false, pending: false, paused: false}); + testSynthState(win2, { speaking: false, pending: false, paused: false}); + + win1.speechSynthesis.pause(); + + testSynthState(win1, { speaking: false, pending: false, paused: true}); + testSynthState(win2, { speaking: false, pending: false, paused: false}); + + win1.speechSynthesis.speak(utterance3); + win2.speechSynthesis.speak(utterance4); + + testSynthState(win1, { speaking: false, pending: true, paused: true}); + testSynthState(win2, { speaking: false, pending: true, paused: false}); + }); + + utterance4.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start4', 'start4'); + testSynthState(win1, { speaking: true, pending: true, paused: true}); + testSynthState(win2, { speaking: true, pending: false, paused: false}); + + win1.speechSynthesis.resume(); + }); + utterance4.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end4', 'end4'); + testSynthState(win1, { speaking: false, pending: true, paused: false}); + testSynthState(win2, { speaking: false, pending: false, paused: false}); + }); + + utterance3.addEventListener('start', function(e) { + is(eventOrder.shift(), 'start3', 'start3'); + testSynthState(win1, { speaking: true, pending: false, paused: false}); + testSynthState(win2, { speaking: true, pending: false, paused: false}); + }); + + utterance3.addEventListener('end', function(e) { + is(eventOrder.shift(), 'end3', 'end3'); + testSynthState(win1, { speaking: false, pending: false, paused: false}); + testSynthState(win2, { speaking: false, pending: false, paused: false}); + + SimpleTest.finish(); + }); + + win1.speechSynthesis.speak(utterance1); + win2.speechSynthesis.speak(utterance2); + }); +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_indirect_service_events.html b/dom/media/webspeech/synth/test/file_indirect_service_events.html new file mode 100644 index 0000000000..5ed7812757 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_indirect_service_events.html @@ -0,0 +1,102 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1155034 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1155034: Check that indirect audio services dispatch their own events</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1155034">Mozilla Bug 1155034</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1155034 **/ + +function testFunc(done_cb) { + function test_with_events() { + info('test_with_events'); + var utterance = new SpeechSynthesisUtterance("never end, callback events"); + utterance.lang = 'it-IT-noend'; + + utterance.addEventListener('start', function(e) { + info('start test_with_events'); + speechSynthesis.pause(); + // Wait to see if we get some bad events we didn't expect. + }); + + utterance.addEventListener('pause', function(e) { + is(e.charIndex, 1, 'pause event charIndex matches service arguments'); + is(e.elapsedTime, 1.5, 'pause event elapsedTime matches service arguments'); + speechSynthesis.resume(); + }); + + utterance.addEventListener('resume', function(e) { + is(e.charIndex, 1, 'resume event charIndex matches service arguments'); + is(e.elapsedTime, 1.5, 'resume event elapsedTime matches service arguments'); + speechSynthesis.cancel(); + }); + + utterance.addEventListener('end', function(e) { + is(e.charIndex, 1, 'resume event charIndex matches service arguments'); + is(e.elapsedTime, 1.5, 'end event elapsedTime matches service arguments'); + test_no_events(); + }); + + info('start speak'); + speechSynthesis.speak(utterance); + } + + function forbiddenEvent(e) { + ok(false, 'no "' + e.type + '" event was explicitly dispatched from the service') + } + + function test_no_events() { + info('test_no_events'); + var utterance = new SpeechSynthesisUtterance("never end"); + utterance.lang = "it-IT-noevents-noend"; + utterance.addEventListener('start', function(e) { + speechSynthesis.pause(); + // Wait to see if we get some bad events we didn't expect. + setTimeout(function() { + ok(true, 'didn\'t get any unwanted events'); + utterance.removeEventListener('end', forbiddenEvent); + SpecialPowers.wrap(speechSynthesis).forceEnd(); + done_cb(); + }, 1000); + }); + + utterance.addEventListener('pause', forbiddenEvent); + utterance.addEventListener('end', forbiddenEvent); + + speechSynthesis.speak(utterance); + } + + test_with_events(); +} + +// Run test with no global queue, and then run it with a global queue. +testFunc(function() { + SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.force_global_queue', true]] }, function() { + testFunc(SimpleTest.finish) + }); +}); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_setup.html b/dom/media/webspeech/synth/test/file_setup.html new file mode 100644 index 0000000000..da8c2c6824 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_setup.html @@ -0,0 +1,96 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=525444 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 525444: Web Speech API check all classes are present</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +ok(SpeechSynthesis, "SpeechSynthesis exists in global scope"); +ok(SpeechSynthesisVoice, "SpeechSynthesisVoice exists in global scope"); +ok(SpeechSynthesisErrorEvent, "SpeechSynthesisErrorEvent exists in global scope"); +ok(SpeechSynthesisEvent, "SpeechSynthesisEvent exists in global scope"); + +// SpeechSynthesisUtterance is the only type that has a constructor +// and writable properties +ok(SpeechSynthesisUtterance, "SpeechSynthesisUtterance exists in global scope"); +var ssu = new SpeechSynthesisUtterance("hello world"); +is(typeof ssu, "object", "SpeechSynthesisUtterance instance is an object"); +is(ssu.text, "hello world", "SpeechSynthesisUtterance.text is correct"); +is(ssu.volume, 1, "SpeechSynthesisUtterance.volume default is correct"); +is(ssu.rate, 1, "SpeechSynthesisUtterance.rate default is correct"); +is(ssu.pitch, 1, "SpeechSynthesisUtterance.pitch default is correct"); +ssu.lang = "he-IL"; +ssu.volume = 0.5; +ssu.rate = 2.0; +ssu.pitch = 1.5; +is(ssu.lang, "he-IL", "SpeechSynthesisUtterance.lang is correct"); +is(ssu.volume, 0.5, "SpeechSynthesisUtterance.volume is correct"); +is(ssu.rate, 2.0, "SpeechSynthesisUtterance.rate is correct"); +is(ssu.pitch, 1.5, "SpeechSynthesisUtterance.pitch is correct"); + +// Assign a rate that is out of bounds +ssu.rate = 20; +is(ssu.rate, 10, "SpeechSynthesisUtterance.rate enforces max of 10"); +ssu.rate = 0; +is(ssu.rate.toPrecision(1), "0.1", "SpeechSynthesisUtterance.rate enforces min of 0.1"); + +// Assign a volume which is out of bounds +ssu.volume = 2; +is(ssu.volume, 1, "SpeechSynthesisUtterance.volume enforces max of 1"); +ssu.volume = -1; +is(ssu.volume, 0, "SpeechSynthesisUtterance.volume enforces min of 0"); + +// Assign a pitch which is out of bounds +ssu.pitch = 2.1; +is(ssu.pitch, 2, "SpeechSynthesisUtterance.pitch enforces max of 2"); +ssu.pitch = -1; +is(ssu.pitch, 0, "SpeechSynthesisUtterance.pitch enforces min of 0"); + +// Test for singleton instance hanging off of window. +ok(speechSynthesis, "speechSynthesis exists in global scope"); +is(typeof speechSynthesis, "object", "speechSynthesis instance is an object"); +is(typeof speechSynthesis.speak, "function", "speechSynthesis.speak is a function"); +is(typeof speechSynthesis.cancel, "function", "speechSynthesis.cancel is a function"); +is(typeof speechSynthesis.pause, "function", "speechSynthesis.pause is a function"); +is(typeof speechSynthesis.resume, "function", "speechSynthesis.resume is a function"); +is(typeof speechSynthesis.getVoices, "function", "speechSynthesis.getVoices is a function"); + +is(typeof speechSynthesis.pending, "boolean", "speechSynthesis.pending is a boolean"); +is(typeof speechSynthesis.speaking, "boolean", "speechSynthesis.speaking is a boolean"); +is(typeof speechSynthesis.paused, "boolean", "speechSynthesis.paused is a boolean"); + +var voices1 = speechSynthesis.getVoices(); +var voices2 = speechSynthesis.getVoices(); + +ok(!!voices1.length, "More than one voice found"); +ok(voices1.length == voices2.length, "Voice count matches"); + +for (var i in voices1) { + ok(voices1[i] == voices2[i], "Voice instance matches"); +} + +SimpleTest.finish(); +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_speech_cancel.html b/dom/media/webspeech/synth/test/file_speech_cancel.html new file mode 100644 index 0000000000..2ab0e1d0a8 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_cancel.html @@ -0,0 +1,100 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1150315 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1150315: Check that successive cancel/speak calls work</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1150315">Mozilla Bug 1150315</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1150315 **/ + +function testFunc(done_cb) { + var gotEndEvent = false; + // A long utterance that we will interrupt. + var utterance = new SpeechSynthesisUtterance("Donec ac nunc feugiat, posuere " + + "mauris id, pharetra velit. Donec fermentum orci nunc, sit amet maximus" + + "dui tincidunt ut. Sed ultricies ac nisi a laoreet. Proin interdum," + + "libero maximus hendrerit posuere, lorem risus egestas nisl, a" + + "ultricies massa justo eu nisi. Duis mattis nibh a ligula tincidunt" + + "tincidunt non eu erat. Sed bibendum varius vulputate. Cras leo magna," + + "ornare ac posuere vel, luctus id metus. Mauris nec quam ac augue" + + "consectetur bibendum. Integer a commodo tortor. Duis semper dolor eu" + + "facilisis facilisis. Etiam venenatis turpis est, quis tincidunt velit" + + "suscipit a. Cras semper orci in sapien rhoncus bibendum. Suspendisse" + + "eu ex lobortis, finibus enim in, condimentum quam. Maecenas eget dui" + + "ipsum. Aliquam tortor leo, interdum eget congue ut, tempor id elit."); + utterance.addEventListener('start', function(e) { + ok(true, 'start utterance 1'); + speechSynthesis.cancel(); + info('cancel!'); + speechSynthesis.speak(utterance2); + info('speak??'); + }); + + var utterance2 = new SpeechSynthesisUtterance("Proin ornare neque vitae " + + "risus mattis rutrum. Suspendisse a velit ut est convallis aliquet." + + "Nullam ante elit, malesuada vel luctus rutrum, ultricies nec libero." + + "Praesent eu iaculis orci. Sed nisl diam, sodales ac purus et," + + "volutpat interdum tortor. Nullam aliquam porta elit et maximus. Cras" + + "risus lectus, elementum vel sodales vel, ultricies eget lectus." + + "Curabitur velit lacus, mollis vel finibus et, molestie sit amet" + + "sapien. Proin vitae dolor ac augue posuere efficitur ac scelerisque" + + "diam. Nulla sed odio elit."); + utterance2.addEventListener('start', function() { + info('start'); + speechSynthesis.cancel(); + speechSynthesis.speak(utterance3); + }); + utterance2.addEventListener('end', function(e) { + gotEndEvent = true; + }); + + var utterance3 = new SpeechSynthesisUtterance("Hello, world 3!"); + utterance3.addEventListener('start', function() { + ok(gotEndEvent, "didn't get start event for this utterance"); + }); + utterance3.addEventListener('end', done_cb); + + // Speak/cancel while paused (Bug 1187105) + speechSynthesis.pause(); + speechSynthesis.speak(new SpeechSynthesisUtterance("hello.")); + ok(speechSynthesis.pending, "paused speechSynthesis has an utterance queued."); + speechSynthesis.cancel(); + ok(!speechSynthesis.pending, "paused speechSynthesis has no utterance queued."); + speechSynthesis.resume(); + + speechSynthesis.speak(utterance); + ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet."); + ok(speechSynthesis.pending, "speechSynthesis has an utterance queued."); +} + +// Run test with no global queue, and then run it with a global queue. +testFunc(function() { + SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.force_global_queue', true]] }, function() { + testFunc(SimpleTest.finish) + }); +}); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_speech_error.html b/dom/media/webspeech/synth/test/file_speech_error.html new file mode 100644 index 0000000000..b98ec2fac0 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_error.html @@ -0,0 +1,46 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1226015 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1226015</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1226015">Mozilla Bug 1226015</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1226015 **/ + +function testFunc(done_cb) { + var utterance = new SpeechSynthesisUtterance(); + utterance.lang = 'it-IT-failatstart'; + + speechSynthesis.speak(utterance); + speechSynthesis.cancel(); + + ok(true, "we didn't crash, that is good.") + SimpleTest.finish(); +} + +// Run test with no global queue, and then run it with a global queue. +testFunc(); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_speech_queue.html b/dom/media/webspeech/synth/test/file_speech_queue.html new file mode 100644 index 0000000000..a471034dcf --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_queue.html @@ -0,0 +1,86 @@ +<!DOCTYPE HTML> +<html lang="en-US"> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=525444 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 525444: Web Speech API, check speech synth queue</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=525444">Mozilla Bug 525444</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +// XXX: Rate and pitch are not tested. + +var langUriMap = {}; + +for (let voice of speechSynthesis.getVoices()) { + langUriMap[voice.lang] = voice.voiceURI; + ok(true, voice.lang + ' ' + voice.voiceURI + ' ' + voice.default); + is(voice.default, voice.lang == 'en-JM', 'Only Jamaican voice should be default'); +} + +ok(langUriMap['en-JM'], 'No English-Jamaican voice'); +ok(langUriMap['en-GB'], 'No English-British voice'); +ok(langUriMap['en-CA'], 'No English-Canadian voice'); +ok(langUriMap['fr-CA'], 'No French-Canadian voice'); +ok(langUriMap['es-MX'], 'No Spanish-Mexican voice'); +ok(langUriMap['it-IT-fail'], 'No Failing Italian voice'); + +function testFunc(done_cb) { + synthTestQueue( + [[{text: "Hello, world."}, + { uri: langUriMap['en-JM'] }], + [{text: "Bonjour tout le monde .", + args: { lang: "fr", rate: 0.5, pitch: 0.75 }}, + { uri: langUriMap['fr-CA'], rate: 0.5, pitch: 0.75}], + [{text: "How are you doing?", args: { lang: "en-GB" } }, + { rate: 1, pitch: 1, uri: langUriMap['en-GB']}], + [{text: "Come stai?", args: { lang: "it-IT-fail" } }, + { rate: 1, pitch: 1, uri: langUriMap['it-IT-fail'], err: true }], + [{text: "¡hasta mañana!", args: { lang: "es-MX" } }, + { uri: langUriMap['es-MX'] }]], + function () { + var test_data = []; + var voices = speechSynthesis.getVoices(); + for (let voice of voices) { + if (voice.lang.split("-").length > 2) { + // Skip voices that don't automatically end with success + continue; + } + test_data.push([{text: "Hello world", args: { voice} }, + {uri: voice.voiceURI}]); + } + + synthTestQueue(test_data, done_cb); + }); +} + +// Run test with no global queue, and then run it with a global queue. +testFunc(function() { + SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.force_global_queue', true]] }, function() { + testFunc(SimpleTest.finish) + }); +}); + + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_speech_repeating_utterance.html b/dom/media/webspeech/synth/test/file_speech_repeating_utterance.html new file mode 100644 index 0000000000..6e37653057 --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_repeating_utterance.html @@ -0,0 +1,26 @@ +<!DOCTYPE HTML> +<html> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1305344: Utterance not repeating in Firefox</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.ok = parent.ok; + </script> + <script src="common.js"></script> +</head> +<body> + <script> + var utterance = new SpeechSynthesisUtterance("repeating?"); + var counter = 0; + utterance.addEventListener('start', function(e) { + if (counter++ === 1) { + ok(true) + SimpleTest.finish(); + } + }); + speechSynthesis.speak(utterance); + speechSynthesis.speak(utterance); + </script> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/file_speech_simple.html b/dom/media/webspeech/synth/test/file_speech_simple.html new file mode 100644 index 0000000000..c3f240ccdc --- /dev/null +++ b/dom/media/webspeech/synth/test/file_speech_simple.html @@ -0,0 +1,53 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=650295 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 650295: Web Speech API check all classes are present</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.info = parent.info; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> + <script type="application/javascript" src="common.js"></script> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +var gotStartEvent = false; +var gotBoundaryEvent = false; +var utterance = new SpeechSynthesisUtterance("Hello, world!"); +utterance.addEventListener('start', function(e) { + ok(speechSynthesis.speaking, "speechSynthesis is speaking."); + ok(!speechSynthesis.pending, "speechSynthesis has no other utterances queued."); + gotStartEvent = true; +}); + +utterance.addEventListener('end', function(e) { + ok(!speechSynthesis.speaking, "speechSynthesis is not speaking."); + ok(!speechSynthesis.pending, "speechSynthesis has no other utterances queued."); + ok(gotStartEvent, "Got 'start' event."); + info('end ' + e.elapsedTime); + SimpleTest.finish(); +}); + +speechSynthesis.speak(utterance); +ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet."); +ok(speechSynthesis.pending, "speechSynthesis has an utterance queued."); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/mochitest.toml b/dom/media/webspeech/synth/test/mochitest.toml new file mode 100644 index 0000000000..5180e2ea01 --- /dev/null +++ b/dom/media/webspeech/synth/test/mochitest.toml @@ -0,0 +1,40 @@ +[DEFAULT] +tags = "mtg" +subsuite = "media" +support-files = [ + "common.js", + "file_bfcache_page1.html", + "file_bfcache_page2.html", + "file_setup.html", + "file_speech_queue.html", + "file_speech_simple.html", + "file_speech_cancel.html", + "file_speech_error.html", + "file_indirect_service_events.html", + "file_global_queue.html", + "file_global_queue_cancel.html", + "file_global_queue_pause.html", + "file_speech_repeating_utterance.html", +] + +["test_bfcache.html"] + +["test_global_queue.html"] + +["test_global_queue_cancel.html"] + +["test_global_queue_pause.html"] + +["test_indirect_service_events.html"] + +["test_setup.html"] + +["test_speech_cancel.html"] + +["test_speech_error.html"] + +["test_speech_queue.html"] + +["test_speech_repeating_utterance.html"] + +["test_speech_simple.html"] diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp new file mode 100644 index 0000000000..075e8aa878 --- /dev/null +++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp @@ -0,0 +1,288 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "nsFakeSynthServices.h" +#include "nsPrintfCString.h" +#include "SharedBuffer.h" + +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" + +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "prenv.h" +#include "mozilla/Preferences.h" +#include "mozilla/DebugOnly.h" + +#define CHANNELS 1 +#define SAMPLERATE 1600 + +namespace mozilla::dom { + +StaticRefPtr<nsFakeSynthServices> nsFakeSynthServices::sSingleton; + +enum VoiceFlags { + eSuppressEvents = 1, + eSuppressEnd = 2, + eFailAtStart = 4, + eFail = 8 +}; + +struct VoiceDetails { + const char* uri; + const char* name; + const char* lang; + bool defaultVoice; + uint32_t flags; +}; + +static const VoiceDetails sVoices[] = { + {"urn:moz-tts:fake:bob", "Bob Marley", "en-JM", true, 0}, + {"urn:moz-tts:fake:amy", "Amy Winehouse", "en-GB", false, 0}, + {"urn:moz-tts:fake:lenny", "Leonard Cohen", "en-CA", false, 0}, + {"urn:moz-tts:fake:celine", "Celine Dion", "fr-CA", false, 0}, + { + "urn:moz-tts:fake:julie", + "Julieta Venegas", + "es-MX", + false, + }, + {"urn:moz-tts:fake:zanetta", "Zanetta Farussi", "it-IT", false, 0}, + {"urn:moz-tts:fake:margherita", "Margherita Durastanti", + "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd}, + {"urn:moz-tts:fake:teresa", "Teresa Cornelys", "it-IT-noend", false, + eSuppressEnd}, + {"urn:moz-tts:fake:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, + eFailAtStart}, + {"urn:moz-tts:fake:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, + eFail}, +}; + +// FakeSynthCallback +class FakeSynthCallback : public nsISpeechTaskCallback { + public: + explicit FakeSynthCallback(nsISpeechTask* aTask) : mTask(aTask) {} + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(FakeSynthCallback, + nsISpeechTaskCallback) + + NS_IMETHOD OnPause() override { + if (mTask) { + mTask->DispatchPause(1.5, 1); + } + + return NS_OK; + } + + NS_IMETHOD OnResume() override { + if (mTask) { + mTask->DispatchResume(1.5, 1); + } + + return NS_OK; + } + + NS_IMETHOD OnCancel() override { + if (mTask) { + mTask->DispatchEnd(1.5, 1); + } + + return NS_OK; + } + + NS_IMETHOD OnVolumeChanged(float aVolume) override { return NS_OK; } + + private: + virtual ~FakeSynthCallback() = default; + + nsCOMPtr<nsISpeechTask> mTask; +}; + +NS_IMPL_CYCLE_COLLECTION(FakeSynthCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FakeSynthCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback) + +// FakeSpeechSynth + +class FakeSpeechSynth : public nsISpeechService { + public: + FakeSpeechSynth() = default; + + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + + private: + virtual ~FakeSpeechSynth() = default; +}; + +NS_IMPL_ISUPPORTS(FakeSpeechSynth, nsISpeechService) + +NS_IMETHODIMP +FakeSpeechSynth::Speak(const nsAString& aText, const nsAString& aUri, + float aVolume, float aRate, float aPitch, + nsISpeechTask* aTask) { + class DispatchStart final : public Runnable { + public: + explicit DispatchStart(nsISpeechTask* aTask) + : mozilla::Runnable("DispatchStart"), mTask(aTask) {} + + NS_IMETHOD Run() override { + mTask->DispatchStart(); + + return NS_OK; + } + + private: + nsCOMPtr<nsISpeechTask> mTask; + }; + + class DispatchEnd final : public Runnable { + public: + DispatchEnd(nsISpeechTask* aTask, const nsAString& aText) + : mozilla::Runnable("DispatchEnd"), mTask(aTask), mText(aText) {} + + NS_IMETHOD Run() override { + mTask->DispatchEnd(mText.Length() / 2, mText.Length()); + + return NS_OK; + } + + private: + nsCOMPtr<nsISpeechTask> mTask; + nsString mText; + }; + + class DispatchError final : public Runnable { + public: + DispatchError(nsISpeechTask* aTask, const nsAString& aText) + : mozilla::Runnable("DispatchError"), mTask(aTask), mText(aText) {} + + NS_IMETHOD Run() override { + mTask->DispatchError(mText.Length() / 2, mText.Length()); + + return NS_OK; + } + + private: + nsCOMPtr<nsISpeechTask> mTask; + nsString mText; + }; + + uint32_t flags = 0; + for (VoiceDetails voice : sVoices) { + if (aUri.EqualsASCII(voice.uri)) { + flags = voice.flags; + break; + } + } + + if (flags & eFailAtStart) { + return NS_ERROR_FAILURE; + } + + RefPtr<FakeSynthCallback> cb = + new FakeSynthCallback((flags & eSuppressEvents) ? nullptr : aTask); + + aTask->Setup(cb); + + nsCOMPtr<nsIRunnable> runnable = new DispatchStart(aTask); + NS_DispatchToMainThread(runnable); + + if (flags & eFail) { + runnable = new DispatchError(aTask, aText); + NS_DispatchToMainThread(runnable); + } else if ((flags & eSuppressEnd) == 0) { + runnable = new DispatchEnd(aTask, aText); + NS_DispatchToMainThread(runnable); + } + + return NS_OK; +} + +// nsFakeSynthService + +NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(nsFakeSynthServices) +NS_IMPL_RELEASE(nsFakeSynthServices) + +static void AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, + uint32_t aLength) { + RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance(); + for (uint32_t i = 0; i < aLength; i++) { + NS_ConvertUTF8toUTF16 name(aVoices[i].name); + NS_ConvertUTF8toUTF16 uri(aVoices[i].uri); + NS_ConvertUTF8toUTF16 lang(aVoices[i].lang); + // These services can handle more than one utterance at a time and have + // several speaking simultaniously. So, aQueuesUtterances == false + registry->AddVoice(aService, uri, name, lang, true, false); + if (aVoices[i].defaultVoice) { + registry->SetDefaultVoice(uri, true); + } + } + + registry->NotifyVoicesChanged(); +} + +void nsFakeSynthServices::Init() { + mSynthService = new FakeSpeechSynth(); + AddVoices(mSynthService, sVoices, ArrayLength(sVoices)); +} + +// nsIObserver + +NS_IMETHODIMP +nsFakeSynthServices::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + MOZ_ASSERT(NS_IsMainThread()); + if (NS_WARN_IF(!(!strcmp(aTopic, "speech-synth-started")))) { + return NS_ERROR_UNEXPECTED; + } + + if (Preferences::GetBool("media.webspeech.synth.test")) { + NS_DispatchToMainThread(NewRunnableMethod( + "dom::nsFakeSynthServices::Init", this, &nsFakeSynthServices::Init)); + } + + return NS_OK; +} + +// static methods + +nsFakeSynthServices* nsFakeSynthServices::GetInstance() { + MOZ_ASSERT(NS_IsMainThread()); + if (!XRE_IsParentProcess()) { + MOZ_ASSERT(false, + "nsFakeSynthServices can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton) { + sSingleton = new nsFakeSynthServices(); + ClearOnShutdown(&sSingleton); + } + + return sSingleton; +} + +already_AddRefed<nsFakeSynthServices> +nsFakeSynthServices::GetInstanceForService() { + RefPtr<nsFakeSynthServices> picoService = GetInstance(); + return picoService.forget(); +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.h b/dom/media/webspeech/synth/test/nsFakeSynthServices.h new file mode 100644 index 0000000000..f7e1ca7da6 --- /dev/null +++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsFakeSynthServices_h +#define nsFakeSynthServices_h + +#include "nsTArray.h" +#include "nsIObserver.h" +#include "nsISpeechService.h" +#include "nsRefPtrHashtable.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/Monitor.h" + +namespace mozilla::dom { + +class nsFakeSynthServices : public nsIObserver { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSIOBSERVER + + nsFakeSynthServices() = default; + + static nsFakeSynthServices* GetInstance(); + + static already_AddRefed<nsFakeSynthServices> GetInstanceForService(); + + private: + virtual ~nsFakeSynthServices() = default; + + void Init(); + + nsCOMPtr<nsISpeechService> mSynthService; + + static StaticRefPtr<nsFakeSynthServices> sSingleton; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/test/startup/file_voiceschanged.html b/dom/media/webspeech/synth/test/startup/file_voiceschanged.html new file mode 100644 index 0000000000..6bb25462e4 --- /dev/null +++ b/dom/media/webspeech/synth/test/startup/file_voiceschanged.html @@ -0,0 +1,32 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1254378 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1254378: Web Speech API check all classes are present</title> + <script type="application/javascript"> + window.SimpleTest = parent.SimpleTest; + window.is = parent.is; + window.isnot = parent.isnot; + window.ok = parent.ok; + </script> +</head> +<body> +<script type="application/javascript"> + +/** Test for Bug 1254378 **/ + +function onVoicesChanged() { + isnot(speechSynthesis.getVoices().length, 0, "Voices added"); + speechSynthesis.removeEventListener("voiceschanged", onVoicesChanged); + SimpleTest.finish(); +} + +speechSynthesis.addEventListener("voiceschanged", onVoicesChanged); + +is(speechSynthesis.getVoices().length, 0, "No voices added initially"); +</script> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/startup/mochitest.toml b/dom/media/webspeech/synth/test/startup/mochitest.toml new file mode 100644 index 0000000000..ab8b057b2a --- /dev/null +++ b/dom/media/webspeech/synth/test/startup/mochitest.toml @@ -0,0 +1,8 @@ +[DEFAULT] +tags = "mtg" +subsuite = "media" +support-files = ["file_voiceschanged.html"] + +["test_voiceschanged.html"] +skip-if = ["verify"] + diff --git a/dom/media/webspeech/synth/test/startup/test_voiceschanged.html b/dom/media/webspeech/synth/test/startup/test_voiceschanged.html new file mode 100644 index 0000000000..a60252ea7e --- /dev/null +++ b/dom/media/webspeech/synth/test/startup/test_voiceschanged.html @@ -0,0 +1,32 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1254378 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1254378: Emit onvoiceschanged when voices first added</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1254378">Mozilla Bug 1254378</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1254378 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv({ set: [['media.webspeech.synth.enabled', true]] }, + function() { document.getElementById("testFrame").src = "file_voiceschanged.html"; }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_bfcache.html b/dom/media/webspeech/synth/test/test_bfcache.html new file mode 100644 index 0000000000..ba5981a42b --- /dev/null +++ b/dom/media/webspeech/synth/test/test_bfcache.html @@ -0,0 +1,46 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1230533 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1230533: Test speech is stopped from a window when unloaded</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1230533">Mozilla Bug 1230533</a> +<p id="display"></p> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); +let testWin; + +function onDone() { + testWin.close(); + SimpleTest.finish(); +} + +SpecialPowers.pushPrefEnv({ set: [ + ['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', true]] }, + function() { + testWin = window.open("about:blank", "testWin"); + testWin.onload = function(e) { + waitForVoices(testWin) + .then(() => testWin.location = "file_bfcache_page1.html") + }; + }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_global_queue.html b/dom/media/webspeech/synth/test/test_global_queue.html new file mode 100644 index 0000000000..177f79b399 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_global_queue.html @@ -0,0 +1,35 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1188099 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1188099: Global queue should correctly schedule utterances</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', true]] }, + function() { loadSpeechTest("file_global_queue.html"); }); + +</script> +</pre> +</body> +</html>
\ No newline at end of file diff --git a/dom/media/webspeech/synth/test/test_global_queue_cancel.html b/dom/media/webspeech/synth/test/test_global_queue_cancel.html new file mode 100644 index 0000000000..748d1367b5 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_global_queue_cancel.html @@ -0,0 +1,35 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1188099 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1188099: Calling cancel() should work correctly with global queue</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', true]] }, + function() { loadSpeechTest("file_global_queue_cancel.html"); }); + +</script> +</pre> +</body> +</html>
\ No newline at end of file diff --git a/dom/media/webspeech/synth/test/test_global_queue_pause.html b/dom/media/webspeech/synth/test/test_global_queue_pause.html new file mode 100644 index 0000000000..9632d85127 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_global_queue_pause.html @@ -0,0 +1,35 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1188099 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1188099: Calling pause() should work correctly with global queue</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', true]] }, + function() { loadSpeechTest("file_global_queue_pause.html"); }); + +</script> +</pre> +</body> +</html>
\ No newline at end of file diff --git a/dom/media/webspeech/synth/test/test_indirect_service_events.html b/dom/media/webspeech/synth/test/test_indirect_service_events.html new file mode 100644 index 0000000000..e5b32e70f0 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_indirect_service_events.html @@ -0,0 +1,36 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1155034 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1155034: Check that indirect audio services dispatch their own events</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1155034">Mozilla Bug 1155034</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1155034 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', false]] }, + function() { loadSpeechTest("file_indirect_service_events.html"); }); + + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_setup.html b/dom/media/webspeech/synth/test/test_setup.html new file mode 100644 index 0000000000..da07687750 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_setup.html @@ -0,0 +1,32 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=525444 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 525444: Web Speech API check all classes are present</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv({ set: [['media.webspeech.synth.enabled', true]] }, + function() { document.getElementById("testFrame").src = "file_setup.html"; }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_speech_cancel.html b/dom/media/webspeech/synth/test/test_speech_cancel.html new file mode 100644 index 0000000000..ced952c736 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_cancel.html @@ -0,0 +1,35 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1150315 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1150315: Web Speech API check all classes are present</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1150315">Mozilla Bug 1150315</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1150315 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', false]] }, + function() { loadSpeechTest("file_speech_cancel.html"); }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_speech_error.html b/dom/media/webspeech/synth/test/test_speech_error.html new file mode 100644 index 0000000000..e2ce156dc6 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_error.html @@ -0,0 +1,35 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=1226015 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1150315: Web Speech API check all classes are present</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1226015">Mozilla Bug 1226015</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 1226015 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', false]] }, + function() { loadSpeechTest("file_speech_error.html"); }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_speech_queue.html b/dom/media/webspeech/synth/test/test_speech_queue.html new file mode 100644 index 0000000000..3bca9e0ce2 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_queue.html @@ -0,0 +1,37 @@ +<!DOCTYPE HTML> +<html lang="en-US"> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=525444 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 525444: Web Speech API, check speech synth queue</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=525444">Mozilla Bug 525444</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true], + ['media.webspeech.synth.force_global_queue', false]] }, + function() { + loadSpeechTest("file_speech_queue.html"); + }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_speech_repeating_utterance.html b/dom/media/webspeech/synth/test/test_speech_repeating_utterance.html new file mode 100644 index 0000000000..6313a275c1 --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_repeating_utterance.html @@ -0,0 +1,18 @@ +<!DOCTYPE HTML> +<html> +<head> + <meta charset="utf-8"> + <title>Test for Bug 1305344: Utterance not repeating in Firefox</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script src="common.js"></script> + <link rel="stylesheet" href="/tests/SimpleTest/test.css"/> +</head> +<body> + <a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1305344">Mozilla Bug 1305344</a> + <iframe id="testFrame"></iframe> + <script> + SimpleTest.waitForExplicitFinish(); + loadSpeechTest('file_speech_repeating_utterance.html'); + </script> +</body> +</html> diff --git a/dom/media/webspeech/synth/test/test_speech_simple.html b/dom/media/webspeech/synth/test/test_speech_simple.html new file mode 100644 index 0000000000..c6c0e3a5be --- /dev/null +++ b/dom/media/webspeech/synth/test/test_speech_simple.html @@ -0,0 +1,34 @@ +<!DOCTYPE HTML> +<html> +<!-- +https://bugzilla.mozilla.org/show_bug.cgi?id=650295 +--> +<head> + <meta charset="utf-8"> + <title>Test for Bug 650295: Web Speech API check all classes are present</title> + <script src="/tests/SimpleTest/SimpleTest.js"></script> + <script type="application/javascript" src="common.js"></script> + <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/> +</head> +<body> +<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a> +<p id="display"></p> +<iframe id="testFrame"></iframe> +<div id="content" style="display: none"> + +</div> +<pre id="test"> +<script type="application/javascript"> + +/** Test for Bug 525444 **/ + +SimpleTest.waitForExplicitFinish(); + +SpecialPowers.pushPrefEnv( + { set: [['media.webspeech.synth.enabled', true]] }, + function() { loadSpeechTest("file_speech_simple.html"); }); + +</script> +</pre> +</body> +</html> diff --git a/dom/media/webspeech/synth/windows/SapiService.cpp b/dom/media/webspeech/synth/windows/SapiService.cpp new file mode 100644 index 0000000000..f1e44213d1 --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiService.cpp @@ -0,0 +1,445 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.h" +#include "SapiService.h" +#include "nsServiceManagerUtils.h" +#include "nsEscape.h" +#include "nsXULAppAPI.h" + +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/dom/nsSynthVoiceRegistry.h" +#include "mozilla/dom/nsSpeechTask.h" +#include "mozilla/Preferences.h" +#include "mozilla/ProfilerLabels.h" +#include "mozilla/StaticPrefs_media.h" + +namespace mozilla::dom { + +constexpr static WCHAR kSpCategoryOneCoreVoices[] = + L"HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices"; + +StaticRefPtr<SapiService> SapiService::sSingleton; + +class SapiCallback final : public nsISpeechTaskCallback { + public: + SapiCallback(nsISpeechTask* aTask, ISpVoice* aSapiClient, + uint32_t aTextOffset, uint32_t aSpeakTextLen) + : mTask(aTask), + mSapiClient(aSapiClient), + mTextOffset(aTextOffset), + mSpeakTextLen(aSpeakTextLen), + mCurrentIndex(0), + mStreamNum(0) { + mStartingTime = TimeStamp::Now(); + } + + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SapiCallback, nsISpeechTaskCallback) + + NS_DECL_NSISPEECHTASKCALLBACK + + ULONG GetStreamNum() const { return mStreamNum; } + void SetStreamNum(ULONG aValue) { mStreamNum = aValue; } + + void OnSpeechEvent(const SPEVENT& speechEvent); + + private: + ~SapiCallback() {} + + float GetTimeDurationFromStart() const { + TimeDuration duration = TimeStamp::Now() - mStartingTime; + return duration.ToSeconds(); + } + + // This pointer is used to dispatch events + nsCOMPtr<nsISpeechTask> mTask; + RefPtr<ISpVoice> mSapiClient; + + uint32_t mTextOffset; + uint32_t mSpeakTextLen; + + // Used for calculating the time taken to speak the utterance + TimeStamp mStartingTime; + uint32_t mCurrentIndex; + + ULONG mStreamNum; +}; + +NS_IMPL_CYCLE_COLLECTION(SapiCallback, mTask); + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SapiCallback) + NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(SapiCallback) +NS_IMPL_CYCLE_COLLECTING_RELEASE(SapiCallback) + +NS_IMETHODIMP +SapiCallback::OnPause() { + if (FAILED(mSapiClient->Pause())) { + return NS_ERROR_FAILURE; + } + if (!mTask) { + // When calling pause() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnResume() { + if (FAILED(mSapiClient->Resume())) { + return NS_ERROR_FAILURE; + } + if (!mTask) { + // When calling resume() on child porcess, it may not receive end event + // from chrome process yet. + return NS_ERROR_FAILURE; + } + mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex); + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnCancel() { + // After cancel, mCurrentIndex may be updated. + // At cancel case, use mCurrentIndex for DispatchEnd. + mSpeakTextLen = 0; + // Purge all the previous utterances and speak an empty string + if (FAILED(mSapiClient->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr))) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +NS_IMETHODIMP +SapiCallback::OnVolumeChanged(float aVolume) { + mSapiClient->SetVolume(static_cast<USHORT>(aVolume * 100)); + return NS_OK; +} + +void SapiCallback::OnSpeechEvent(const SPEVENT& speechEvent) { + switch (speechEvent.eEventId) { + case SPEI_START_INPUT_STREAM: + mTask->DispatchStart(); + break; + case SPEI_END_INPUT_STREAM: + if (mSpeakTextLen) { + mCurrentIndex = mSpeakTextLen; + } + mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex); + mTask = nullptr; + break; + case SPEI_TTS_BOOKMARK: + mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(u"mark"_ns, GetTimeDurationFromStart(), + mCurrentIndex, 0, 0); + break; + case SPEI_WORD_BOUNDARY: + mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), + mCurrentIndex, + static_cast<ULONG>(speechEvent.wParam), 1); + break; + case SPEI_SENTENCE_BOUNDARY: + mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset; + mTask->DispatchBoundary(u"sentence"_ns, GetTimeDurationFromStart(), + mCurrentIndex, + static_cast<ULONG>(speechEvent.wParam), 1); + break; + default: + break; + } +} + +// static +void __stdcall SapiService::SpeechEventCallback(WPARAM aWParam, + LPARAM aLParam) { + RefPtr<ISpVoice> spVoice = (ISpVoice*)aWParam; + RefPtr<SapiService> service = (SapiService*)aLParam; + + SPEVENT speechEvent; + while (spVoice->GetEvents(1, &speechEvent, nullptr) == S_OK) { + for (size_t i = 0; i < service->mCallbacks.Length(); i++) { + RefPtr<SapiCallback> callback = service->mCallbacks[i]; + if (callback->GetStreamNum() == speechEvent.ulStreamNum) { + callback->OnSpeechEvent(speechEvent); + if (speechEvent.eEventId == SPEI_END_INPUT_STREAM) { + service->mCallbacks.RemoveElementAt(i); + } + break; + } + } + } +} + +NS_INTERFACE_MAP_BEGIN(SapiService) + NS_INTERFACE_MAP_ENTRY(nsISpeechService) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) +NS_INTERFACE_MAP_END + +NS_IMPL_ADDREF(SapiService) +NS_IMPL_RELEASE(SapiService) + +SapiService::SapiService() : mInitialized(false) {} + +SapiService::~SapiService() {} + +bool SapiService::Init() { + AUTO_PROFILER_LABEL("SapiService::Init", OTHER); + + MOZ_ASSERT(!mInitialized); + + if (Preferences::GetBool("media.webspeech.synth.test") || + !StaticPrefs::media_webspeech_synth_enabled()) { + // When enabled, we shouldn't add OS backend (Bug 1160844) + return false; + } + + // Get all the voices from sapi and register in the SynthVoiceRegistry + if (!RegisterVoices()) { + return false; + } + + mInitialized = true; + return true; +} + +already_AddRefed<ISpVoice> SapiService::InitSapiInstance() { + RefPtr<ISpVoice> spVoice; + if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, + getter_AddRefs(spVoice)))) { + return nullptr; + } + + // Set interest for all the events we are interested in + ULONGLONG eventMask = SPFEI(SPEI_START_INPUT_STREAM) | + SPFEI(SPEI_TTS_BOOKMARK) | SPFEI(SPEI_WORD_BOUNDARY) | + SPFEI(SPEI_SENTENCE_BOUNDARY) | + SPFEI(SPEI_END_INPUT_STREAM); + + if (FAILED(spVoice->SetInterest(eventMask, eventMask))) { + return nullptr; + } + + // Set the callback function for receiving the events + spVoice->SetNotifyCallbackFunction( + (SPNOTIFYCALLBACK*)SapiService::SpeechEventCallback, + (WPARAM)spVoice.get(), (LPARAM)this); + + return spVoice.forget(); +} + +bool SapiService::RegisterVoices() { + nsCOMPtr<nsISynthVoiceRegistry> registry = + do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID); + if (!registry) { + return false; + } + bool result = RegisterVoices(registry, kSpCategoryOneCoreVoices); + result |= RegisterVoices(registry, SPCAT_VOICES); + if (result) { + registry->NotifyVoicesChanged(); + } + return result; +} + +bool SapiService::RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry, + const WCHAR* categoryId) { + nsresult rv; + + RefPtr<ISpObjectTokenCategory> category; + if (FAILED(CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, + IID_ISpObjectTokenCategory, + getter_AddRefs(category)))) { + return false; + } + if (FAILED(category->SetId(categoryId, FALSE))) { + return false; + } + + RefPtr<IEnumSpObjectTokens> voiceTokens; + if (FAILED(category->EnumTokens(nullptr, nullptr, + getter_AddRefs(voiceTokens)))) { + return false; + } + + WCHAR locale[LOCALE_NAME_MAX_LENGTH]; + while (true) { + RefPtr<ISpObjectToken> voiceToken; + if (voiceTokens->Next(1, getter_AddRefs(voiceToken), nullptr) != S_OK) { + break; + } + + RefPtr<ISpDataKey> attributes; + if (FAILED( + voiceToken->OpenKey(L"Attributes", getter_AddRefs(attributes)))) { + continue; + } + + WCHAR* language = nullptr; + if (FAILED(attributes->GetStringValue(L"Language", &language))) { + continue; + } + + // Language attribute is LCID by hex. So we need convert to locale + // name. + nsAutoString hexLcid; + LCID lcid = wcstol(language, nullptr, 16); + CoTaskMemFree(language); + if (NS_WARN_IF( + !LCIDToLocaleName(lcid, locale, LOCALE_NAME_MAX_LENGTH, 0))) { + continue; + } + + WCHAR* description = nullptr; + if (FAILED(voiceToken->GetStringValue(nullptr, &description))) { + continue; + } + + nsAutoString uri; + uri.AssignLiteral("urn:moz-tts:sapi:"); + uri.Append(description); + uri.AppendLiteral("?"); + uri.Append(locale); + + // This service can only speak one utterance at a time, se we set + // aQueuesUtterances to true in order to track global state and schedule + // access to this service. + rv = registry->AddVoice(this, uri, nsDependentString(description), + nsDependentString(locale), true, true); + CoTaskMemFree(description); + if (NS_FAILED(rv)) { + continue; + } + + mVoices.InsertOrUpdate(uri, std::move(voiceToken)); + } + + return true; +} + +NS_IMETHODIMP +SapiService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume, + float aRate, float aPitch, nsISpeechTask* aTask) { + NS_ENSURE_TRUE(mInitialized, NS_ERROR_NOT_AVAILABLE); + + RefPtr<ISpObjectToken> voiceToken; + if (!mVoices.Get(aUri, getter_AddRefs(voiceToken))) { + return NS_ERROR_NOT_AVAILABLE; + } + + RefPtr<ISpVoice> spVoice = InitSapiInstance(); + if (!spVoice) { + return NS_ERROR_FAILURE; + } + + if (FAILED(spVoice->SetVoice(voiceToken))) { + return NS_ERROR_FAILURE; + } + + if (FAILED(spVoice->SetVolume(static_cast<USHORT>(aVolume * 100)))) { + return NS_ERROR_FAILURE; + } + + // The max supported rate in SAPI engines is 3x, and the min is 1/3x. It is + // expressed by an integer. 0 being normal rate, -10 is 1/3 and 10 is 3x. + // Values below and above that are allowed, but the engine may clip the rate + // to its maximum capable value. + // "Each increment between -10 and +10 is logarithmically distributed such + // that incrementing or decrementing by 1 is multiplying or dividing the + // rate by the 10th root of 3" + // https://msdn.microsoft.com/en-us/library/ee431826(v=vs.85).aspx + long rate = aRate != 0 ? static_cast<long>(10 * log10(aRate) / log10(3)) : 0; + if (FAILED(spVoice->SetRate(rate))) { + return NS_ERROR_FAILURE; + } + + // Set the pitch using xml + nsAutoString xml; + xml.AssignLiteral("<pitch absmiddle=\""); + // absmiddle doesn't allow float type + xml.AppendInt(static_cast<int32_t>(aPitch * 10.0f - 10.0f)); + xml.AppendLiteral("\">"); + uint32_t textOffset = xml.Length(); + + for (size_t i = 0; i < aText.Length(); i++) { + switch (aText[i]) { + case '&': + xml.AppendLiteral("&"); + break; + case '<': + xml.AppendLiteral("<"); + break; + case '>': + xml.AppendLiteral(">"); + break; + default: + xml.Append(aText[i]); + break; + } + } + + xml.AppendLiteral("</pitch>"); + + RefPtr<SapiCallback> callback = + new SapiCallback(aTask, spVoice, textOffset, aText.Length()); + + // The last three parameters doesn't matter for an indirect service + nsresult rv = aTask->Setup(callback); + if (NS_FAILED(rv)) { + return rv; + } + + ULONG streamNum; + if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) { + aTask->Setup(nullptr); + return NS_ERROR_FAILURE; + } + + callback->SetStreamNum(streamNum); + // streamNum reassigns same value when last stream is finished even if + // callback for stream end isn't called + // So we cannot use data hashtable and has to add it to vector at last. + mCallbacks.AppendElement(callback); + + return NS_OK; +} + +NS_IMETHODIMP +SapiService::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + return NS_OK; +} + +SapiService* SapiService::GetInstance() { + MOZ_ASSERT(NS_IsMainThread()); + if (XRE_GetProcessType() != GeckoProcessType_Default) { + MOZ_ASSERT(false, "SapiService can only be started on main gecko process"); + return nullptr; + } + + if (!sSingleton) { + RefPtr<SapiService> service = new SapiService(); + if (service->Init()) { + sSingleton = service; + ClearOnShutdown(&sSingleton); + } + } + return sSingleton; +} + +already_AddRefed<SapiService> SapiService::GetInstanceForService() { + RefPtr<SapiService> sapiService = GetInstance(); + return sapiService.forget(); +} + +} // namespace mozilla::dom diff --git a/dom/media/webspeech/synth/windows/SapiService.h b/dom/media/webspeech/synth/windows/SapiService.h new file mode 100644 index 0000000000..79cc20917b --- /dev/null +++ b/dom/media/webspeech/synth/windows/SapiService.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SapiService_h +#define mozilla_dom_SapiService_h + +#include "nsISpeechService.h" +#include "nsIObserver.h" +#include "nsRefPtrHashtable.h" +#include "nsTArray.h" +#include "mozilla/StaticPtr.h" + +#include <windows.h> +#include <sapi.h> + +class nsISynthVoiceRegistry; + +namespace mozilla::dom { + +class SapiCallback; + +class SapiService final : public nsISpeechService, public nsIObserver { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHSERVICE + NS_DECL_NSIOBSERVER + + SapiService(); + bool Init(); + + static SapiService* GetInstance(); + static already_AddRefed<SapiService> GetInstanceForService(); + + static void __stdcall SpeechEventCallback(WPARAM aWParam, LPARAM aLParam); + + private: + virtual ~SapiService(); + + already_AddRefed<ISpVoice> InitSapiInstance(); + bool RegisterVoices(); + bool RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry, + const WCHAR* categoryId); + + nsRefPtrHashtable<nsStringHashKey, ISpObjectToken> mVoices; + nsTArray<RefPtr<SapiCallback>> mCallbacks; + + bool mInitialized; + + static StaticRefPtr<SapiService> sSingleton; +}; + +} // namespace mozilla::dom + +#endif diff --git a/dom/media/webspeech/synth/windows/components.conf b/dom/media/webspeech/synth/windows/components.conf new file mode 100644 index 0000000000..bc9b83a43a --- /dev/null +++ b/dom/media/webspeech/synth/windows/components.conf @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{21b4a45b-9806-4021-a706-d768ab0548f9}', + 'contract_ids': ['@mozilla.org/synthsapi;1'], + 'singleton': True, + 'type': 'mozilla::dom::SapiService', + 'headers': ['/dom/media/webspeech/synth/windows/SapiService.h'], + 'constructor': 'mozilla::dom::SapiService::GetInstanceForService', + 'categories': {"speech-synth-started": 'Sapi Speech Synth'}, + }, +] diff --git a/dom/media/webspeech/synth/windows/moz.build b/dom/media/webspeech/synth/windows/moz.build new file mode 100644 index 0000000000..90bafe9ca7 --- /dev/null +++ b/dom/media/webspeech/synth/windows/moz.build @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "SapiService.cpp", +] + +XPCOM_MANIFESTS += [ + "components.conf", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul" |