summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm')
-rw-r--r--dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm431
1 files changed, 431 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
new file mode 100644
index 0000000000..a815c68644
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@@ -0,0 +1,431 @@
+/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsServiceManagerUtils.h"
+#include "nsObjCExceptions.h"
+#include "nsCocoaUtils.h"
+#include "nsIThread.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Assertions.h"
+#include "OSXSpeechSynthesizerService.h"
+
+#import <Cocoa/Cocoa.h>
+
+@class SpeechDelegate;
+
+// We can escape the default delimiters ("[[" and "]]") by temporarily
+// changing the delimiters just before they appear, and changing them back
+// just after.
+#define DLIM_ESCAPE_START "[[dlim (( ))]]"
+#define DLIM_ESCAPE_END "((dlim [[ ]]))"
+
+using namespace mozilla;
+
+class SpeechTaskCallback final : public nsISpeechTaskCallback {
+ public:
+ SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
+ const nsTArray<size_t>& aOffsets);
+
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback)
+
+ NS_DECL_NSISPEECHTASKCALLBACK
+
+ void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength);
+ void OnError(uint32_t aIndex);
+ void OnDidFinishSpeaking();
+
+ private:
+ virtual ~SpeechTaskCallback();
+
+ float GetTimeDurationFromStart();
+
+ nsCOMPtr<nsISpeechTask> mTask;
+ NSSpeechSynthesizer* mSpeechSynthesizer;
+ SpeechDelegate* mDelegate;
+ TimeStamp mStartingTime;
+ uint32_t mCurrentIndex;
+ nsTArray<size_t> mOffsets;
+};
+
+@interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> {
+ @private
+ SpeechTaskCallback* mCallback;
+}
+
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback;
+@end
+
+@implementation SpeechDelegate
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback {
+ [super init];
+ mCallback = aCallback;
+ return self;
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+ willSpeakWord:(NSRange)aRange
+ ofString:(NSString*)aString {
+ mCallback->OnWillSpeakWord(aRange.location, aRange.length);
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender didFinishSpeaking:(BOOL)aFinishedSpeaking {
+ mCallback->OnDidFinishSpeaking();
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+ didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
+ ofString:(NSString*)aString
+ message:(NSString*)aMessage {
+ mCallback->OnError(aCharacterIndex);
+}
+@end
+
+NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
+ NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
+
+SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
+ const nsTArray<size_t>& aOffsets)
+ : mTask(aTask), mSpeechSynthesizer(aSynth), mCurrentIndex(0), mOffsets(aOffsets.Clone()) {
+ mDelegate = [[SpeechDelegate alloc] initWithCallback:this];
+ [mSpeechSynthesizer setDelegate:mDelegate];
+ mStartingTime = TimeStamp::Now();
+}
+
+SpeechTaskCallback::~SpeechTaskCallback() {
+ [mSpeechSynthesizer setDelegate:nil];
+ [mDelegate release];
+ [mSpeechSynthesizer release];
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnCancel() {
+ NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+ [mSpeechSynthesizer stopSpeaking];
+ return NS_OK;
+
+ NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnPause() {
+ NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+ [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+ if (!mTask) {
+ // When calling pause() on child porcess, it may not receive end event
+ // from chrome process yet.
+ return NS_ERROR_FAILURE;
+ }
+ mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
+ return NS_OK;
+
+ NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnResume() {
+ NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+ [mSpeechSynthesizer continueSpeaking];
+ if (!mTask) {
+ // When calling resume() on child porcess, it may not receive end event
+ // from chrome process yet.
+ return NS_ERROR_FAILURE;
+ }
+ mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
+ return NS_OK;
+
+ NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnVolumeChanged(float aVolume) {
+ NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+ [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
+ forProperty:NSSpeechVolumeProperty
+ error:nil];
+ return NS_OK;
+
+ NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+float SpeechTaskCallback::GetTimeDurationFromStart() {
+ TimeDuration duration = TimeStamp::Now() - mStartingTime;
+ return duration.ToSeconds();
+}
+
+void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) {
+ mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
+ if (!mTask) {
+ return;
+ }
+ mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex, aLength, 1);
+}
+
+void SpeechTaskCallback::OnError(uint32_t aIndex) {
+ if (!mTask) {
+ return;
+ }
+ mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
+}
+
+void SpeechTaskCallback::OnDidFinishSpeaking() {
+ mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
+ // no longer needed
+ [mSpeechSynthesizer setDelegate:nil];
+ mTask = nullptr;
+}
+
+namespace mozilla {
+namespace dom {
+
+struct OSXVoice {
+ OSXVoice() : mIsDefault(false) {}
+
+ nsString mUri;
+ nsString mName;
+ nsString mLocale;
+ bool mIsDefault;
+};
+
+class RegisterVoicesRunnable final : public Runnable {
+ public:
+ RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, nsTArray<OSXVoice>& aList)
+ : Runnable("RegisterVoicesRunnable"), mSpeechService(aSpeechService), mVoices(aList) {}
+
+ NS_IMETHOD Run() override;
+
+ private:
+ ~RegisterVoicesRunnable() override = default;
+
+ // This runnable always use sync mode. It is unnecesarry to reference object
+ OSXSpeechSynthesizerService* mSpeechService;
+ nsTArray<OSXVoice>& mVoices;
+};
+
+NS_IMETHODIMP
+RegisterVoicesRunnable::Run() {
+ nsresult rv;
+ nsCOMPtr<nsISynthVoiceRegistry> registry = do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
+ if (!registry) {
+ return rv;
+ }
+
+ for (OSXVoice voice : mVoices) {
+ rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ continue;
+ }
+
+ if (voice.mIsDefault) {
+ registry->SetDefaultVoice(voice.mUri, true);
+ }
+ }
+
+ registry->NotifyVoicesChanged();
+
+ return NS_OK;
+}
+
+class EnumVoicesRunnable final : public Runnable {
+ public:
+ explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
+ : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {}
+
+ NS_IMETHOD Run() override;
+
+ private:
+ ~EnumVoicesRunnable() override = default;
+
+ RefPtr<OSXSpeechSynthesizerService> mSpeechService;
+};
+
+NS_IMETHODIMP
+EnumVoicesRunnable::Run() {
+ NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+ AutoTArray<OSXVoice, 64> list;
+
+ NSArray* voices = [NSSpeechSynthesizer availableVoices];
+ NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
+
+ for (NSString* voice in voices) {
+ OSXVoice item;
+
+ NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
+
+ nsAutoString identifier;
+ nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], identifier);
+
+ nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName);
+
+ nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
+ item.mLocale.ReplaceChar('_', '-');
+
+ item.mUri.AssignLiteral("urn:moz-tts:osx:");
+ item.mUri.Append(identifier);
+
+ if ([voice isEqualToString:defaultVoice]) {
+ item.mIsDefault = true;
+ }
+
+ list.AppendElement(item);
+ }
+
+ RefPtr<RegisterVoicesRunnable> runnable = new RegisterVoicesRunnable(mSpeechService, list);
+ NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns,
+ GetMainThreadSerialEventTarget(), runnable.forget());
+
+ return NS_OK;
+
+ NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+StaticRefPtr<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::sSingleton;
+
+NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
+ NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+ NS_INTERFACE_MAP_ENTRY(nsIObserver)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
+NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
+
+OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() : mInitialized(false) {}
+
+bool OSXSpeechSynthesizerService::Init() {
+ if (Preferences::GetBool("media.webspeech.synth.test") ||
+ !StaticPrefs::media_webspeech_synth_enabled()) {
+ // When test is enabled, we shouldn't add OS backend (Bug 1160844)
+ return false;
+ }
+
+ nsCOMPtr<nsIThread> thread;
+ if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
+ return false;
+ }
+
+ // Get all the voices and register in the SynthVoiceRegistry
+ nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
+ thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
+
+ mInitialized = true;
+ return true;
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume,
+ float aRate, float aPitch, nsISpeechTask* aTask) {
+ NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+ MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns),
+ "OSXSpeechSynthesizerService doesn't allow this voice URI");
+
+ NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
+ // strlen("urn:moz-tts:osx:") == 16
+ NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
+ [synth setVoice:identifier];
+
+ // default rate is 180-220
+ [synth setObject:[NSNumber numberWithInt:aRate * 200] forProperty:NSSpeechRateProperty error:nil];
+ // volume allows 0.0-1.0
+ [synth setObject:[NSNumber numberWithFloat:aVolume] forProperty:NSSpeechVolumeProperty error:nil];
+ // Use default pitch value to calculate this
+ NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty error:nil];
+ if (defaultPitch) {
+ int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
+ [synth setObject:[NSNumber numberWithInt:newPitch]
+ forProperty:NSSpeechPitchBaseProperty
+ error:nil];
+ }
+
+ nsAutoString escapedText;
+ // We need to map the the offsets from the given text to the escaped text.
+ // The index of the offsets array is the position in the escaped text,
+ // the element value is the position in the user-supplied text.
+ nsTArray<size_t> offsets;
+ offsets.SetCapacity(aText.Length());
+
+ // This loop looks for occurances of "[[" or "]]", escapes them, and
+ // populates the offsets array to supply a map to the original offsets.
+ for (size_t i = 0; i < aText.Length(); i++) {
+ if (aText.Length() > i + 1 &&
+ ((aText[i] == ']' && aText[i + 1] == ']') || (aText[i] == '[' && aText[i + 1] == '['))) {
+ escapedText.AppendLiteral(DLIM_ESCAPE_START);
+ offsets.AppendElements(strlen(DLIM_ESCAPE_START));
+ escapedText.Append(aText[i]);
+ offsets.AppendElement(i);
+ escapedText.Append(aText[++i]);
+ offsets.AppendElement(i);
+ escapedText.AppendLiteral(DLIM_ESCAPE_END);
+ offsets.AppendElements(strlen(DLIM_ESCAPE_END));
+ } else {
+ escapedText.Append(aText[i]);
+ offsets.AppendElement(i);
+ }
+ }
+
+ RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
+ nsresult rv = aTask->Setup(callback);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ NSString* text = nsCocoaUtils::ToNSString(escapedText);
+ BOOL success = [synth startSpeakingString:text];
+ NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
+
+ aTask->DispatchStart();
+ return NS_OK;
+
+ NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
+ const char16_t* aData) {
+ return NS_OK;
+}
+
+OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() {
+ MOZ_ASSERT(NS_IsMainThread());
+ if (XRE_GetProcessType() != GeckoProcessType_Default) {
+ return nullptr;
+ }
+
+ if (!sSingleton) {
+ RefPtr<OSXSpeechSynthesizerService> speechService = new OSXSpeechSynthesizerService();
+ if (speechService->Init()) {
+ sSingleton = speechService;
+ ClearOnShutdown(&sSingleton);
+ }
+ }
+ return sSingleton;
+}
+
+already_AddRefed<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::GetInstanceForService() {
+ RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
+ return speechService.forget();
+}
+
+} // namespace dom
+} // namespace mozilla