/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=2 sw=2 et tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsISupports.h" #include "nsServiceManagerUtils.h" #include "nsObjCExceptions.h" #include "nsCocoaUtils.h" #include "nsIThread.h" #include "nsThreadUtils.h" #include "nsXULAppAPI.h" #include "mozilla/ClearOnShutdown.h" #include "mozilla/dom/nsSynthVoiceRegistry.h" #include "mozilla/dom/nsSpeechTask.h" #include "mozilla/Preferences.h" #include "mozilla/StaticPrefs_media.h" #include "mozilla/Assertions.h" #include "OSXSpeechSynthesizerService.h" #import @class SpeechDelegate; // We can escape the default delimiters ("[[" and "]]") by temporarily // changing the delimiters just before they appear, and changing them back // just after. #define DLIM_ESCAPE_START "[[dlim (( ))]]" #define DLIM_ESCAPE_END "((dlim [[ ]]))" using namespace mozilla; class SpeechTaskCallback final : public nsISpeechTaskCallback { public: SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth, const nsTArray& aOffsets); NS_DECL_CYCLE_COLLECTING_ISUPPORTS NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback) NS_DECL_NSISPEECHTASKCALLBACK void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength); void OnError(uint32_t aIndex); void OnDidFinishSpeaking(); private: virtual ~SpeechTaskCallback(); float GetTimeDurationFromStart(); nsCOMPtr mTask; NSSpeechSynthesizer* mSpeechSynthesizer; SpeechDelegate* mDelegate; TimeStamp mStartingTime; uint32_t mCurrentIndex; nsTArray mOffsets; }; @interface SpeechDelegate : NSObject { @private SpeechTaskCallback* mCallback; } - (id)initWithCallback:(SpeechTaskCallback*)aCallback; @end @implementation SpeechDelegate - (id)initWithCallback:(SpeechTaskCallback*)aCallback { [super init]; mCallback = aCallback; return self; } - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender willSpeakWord:(NSRange)aRange ofString:(NSString*)aString { mCallback->OnWillSpeakWord(aRange.location, aRange.length); } - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender didFinishSpeaking:(BOOL)aFinishedSpeaking { mCallback->OnDidFinishSpeaking(); } - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex ofString:(NSString*)aString message:(NSString*)aMessage { mCallback->OnError(aCharacterIndex); } @end NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask); NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback) NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) NS_INTERFACE_MAP_END NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback) NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback) SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth, const nsTArray& aOffsets) : mTask(aTask), mSpeechSynthesizer(aSynth), mCurrentIndex(0), mOffsets(aOffsets.Clone()) { mDelegate = [[SpeechDelegate alloc] initWithCallback:this]; [mSpeechSynthesizer setDelegate:mDelegate]; mStartingTime = TimeStamp::Now(); } SpeechTaskCallback::~SpeechTaskCallback() { [mSpeechSynthesizer setDelegate:nil]; [mDelegate release]; [mSpeechSynthesizer release]; } NS_IMETHODIMP SpeechTaskCallback::OnCancel() { NS_OBJC_BEGIN_TRY_BLOCK_RETURN; [mSpeechSynthesizer stopSpeaking]; return NS_OK; NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); } NS_IMETHODIMP SpeechTaskCallback::OnPause() { NS_OBJC_BEGIN_TRY_BLOCK_RETURN; [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; if (!mTask) { // When calling pause() on child porcess, it may not receive end event // from chrome process yet. return NS_ERROR_FAILURE; } mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex); return NS_OK; NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); } NS_IMETHODIMP SpeechTaskCallback::OnResume() { NS_OBJC_BEGIN_TRY_BLOCK_RETURN; [mSpeechSynthesizer continueSpeaking]; if (!mTask) { // When calling resume() on child porcess, it may not receive end event // from chrome process yet. return NS_ERROR_FAILURE; } mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex); return NS_OK; NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); } NS_IMETHODIMP SpeechTaskCallback::OnVolumeChanged(float aVolume) { NS_OBJC_BEGIN_TRY_BLOCK_RETURN; [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume] forProperty:NSSpeechVolumeProperty error:nil]; return NS_OK; NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); } float SpeechTaskCallback::GetTimeDurationFromStart() { TimeDuration duration = TimeStamp::Now() - mStartingTime; return duration.ToSeconds(); } void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) { mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex; if (!mTask) { return; } mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex, aLength, 1); } void SpeechTaskCallback::OnError(uint32_t aIndex) { if (!mTask) { return; } mTask->DispatchError(GetTimeDurationFromStart(), aIndex); } void SpeechTaskCallback::OnDidFinishSpeaking() { mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex); // no longer needed [mSpeechSynthesizer setDelegate:nil]; mTask = nullptr; } namespace mozilla { namespace dom { struct OSXVoice { OSXVoice() : mIsDefault(false) {} nsString mUri; nsString mName; nsString mLocale; bool mIsDefault; }; class RegisterVoicesRunnable final : public Runnable { public: RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, nsTArray& aList) : Runnable("RegisterVoicesRunnable"), mSpeechService(aSpeechService), mVoices(aList) {} NS_IMETHOD Run() override; private: ~RegisterVoicesRunnable() override = default; // This runnable always use sync mode. It is unnecesarry to reference object OSXSpeechSynthesizerService* mSpeechService; nsTArray& mVoices; }; NS_IMETHODIMP RegisterVoicesRunnable::Run() { nsresult rv; nsCOMPtr registry = do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv); if (!registry) { return rv; } for (OSXVoice voice : mVoices) { rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false); if (NS_WARN_IF(NS_FAILED(rv))) { continue; } if (voice.mIsDefault) { registry->SetDefaultVoice(voice.mUri, true); } } registry->NotifyVoicesChanged(); return NS_OK; } class EnumVoicesRunnable final : public Runnable { public: explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService) : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {} NS_IMETHOD Run() override; private: ~EnumVoicesRunnable() override = default; RefPtr mSpeechService; }; NS_IMETHODIMP EnumVoicesRunnable::Run() { NS_OBJC_BEGIN_TRY_BLOCK_RETURN; AutoTArray list; NSArray* voices = [NSSpeechSynthesizer availableVoices]; NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; for (NSString* voice in voices) { OSXVoice item; NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice]; nsAutoString identifier; nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], identifier); nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName); nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale); item.mLocale.ReplaceChar('_', '-'); item.mUri.AssignLiteral("urn:moz-tts:osx:"); item.mUri.Append(identifier); if ([voice isEqualToString:defaultVoice]) { item.mIsDefault = true; } list.AppendElement(item); } RefPtr runnable = new RegisterVoicesRunnable(mSpeechService, list); NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns, GetMainThreadSerialEventTarget(), runnable.forget()); return NS_OK; NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); } StaticRefPtr OSXSpeechSynthesizerService::sSingleton; NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService) NS_INTERFACE_MAP_ENTRY(nsISpeechService) NS_INTERFACE_MAP_ENTRY(nsIObserver) NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) NS_INTERFACE_MAP_END NS_IMPL_ADDREF(OSXSpeechSynthesizerService) NS_IMPL_RELEASE(OSXSpeechSynthesizerService) OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() : mInitialized(false) {} bool OSXSpeechSynthesizerService::Init() { if (Preferences::GetBool("media.webspeech.synth.test") || !StaticPrefs::media_webspeech_synth_enabled()) { // When test is enabled, we shouldn't add OS backend (Bug 1160844) return false; } nsCOMPtr thread; if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) { return false; } // Get all the voices and register in the SynthVoiceRegistry nsCOMPtr runnable = new EnumVoicesRunnable(this); thread->Dispatch(runnable, NS_DISPATCH_NORMAL); mInitialized = true; return true; } NS_IMETHODIMP OSXSpeechSynthesizerService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume, float aRate, float aPitch, nsISpeechTask* aTask) { NS_OBJC_BEGIN_TRY_BLOCK_RETURN; MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns), "OSXSpeechSynthesizerService doesn't allow this voice URI"); NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init]; // strlen("urn:moz-tts:osx:") == 16 NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16)); [synth setVoice:identifier]; // default rate is 180-220 [synth setObject:[NSNumber numberWithInt:aRate * 200] forProperty:NSSpeechRateProperty error:nil]; // volume allows 0.0-1.0 [synth setObject:[NSNumber numberWithFloat:aVolume] forProperty:NSSpeechVolumeProperty error:nil]; // Use default pitch value to calculate this NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty error:nil]; if (defaultPitch) { int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5); [synth setObject:[NSNumber numberWithInt:newPitch] forProperty:NSSpeechPitchBaseProperty error:nil]; } nsAutoString escapedText; // We need to map the the offsets from the given text to the escaped text. // The index of the offsets array is the position in the escaped text, // the element value is the position in the user-supplied text. nsTArray offsets; offsets.SetCapacity(aText.Length()); // This loop looks for occurances of "[[" or "]]", escapes them, and // populates the offsets array to supply a map to the original offsets. for (size_t i = 0; i < aText.Length(); i++) { if (aText.Length() > i + 1 && ((aText[i] == ']' && aText[i + 1] == ']') || (aText[i] == '[' && aText[i + 1] == '['))) { escapedText.AppendLiteral(DLIM_ESCAPE_START); offsets.AppendElements(strlen(DLIM_ESCAPE_START)); escapedText.Append(aText[i]); offsets.AppendElement(i); escapedText.Append(aText[++i]); offsets.AppendElement(i); escapedText.AppendLiteral(DLIM_ESCAPE_END); offsets.AppendElements(strlen(DLIM_ESCAPE_END)); } else { escapedText.Append(aText[i]); offsets.AppendElement(i); } } RefPtr callback = new SpeechTaskCallback(aTask, synth, offsets); nsresult rv = aTask->Setup(callback); NS_ENSURE_SUCCESS(rv, rv); NSString* text = nsCocoaUtils::ToNSString(escapedText); BOOL success = [synth startSpeakingString:text]; NS_ENSURE_TRUE(success, NS_ERROR_FAILURE); aTask->DispatchStart(); return NS_OK; NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); } NS_IMETHODIMP OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic, const char16_t* aData) { return NS_OK; } OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() { MOZ_ASSERT(NS_IsMainThread()); if (XRE_GetProcessType() != GeckoProcessType_Default) { return nullptr; } if (!sSingleton) { RefPtr speechService = new OSXSpeechSynthesizerService(); if (speechService->Init()) { sSingleton = speechService; ClearOnShutdown(&sSingleton); } } return sSingleton; } already_AddRefed OSXSpeechSynthesizerService::GetInstanceForService() { RefPtr speechService = GetInstance(); return speechService.forget(); } } // namespace dom } // namespace mozilla