1 files changed, 431 insertions, 0 deletions
diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
new file mode 100644
index 0000000000..a815c68644
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@@ -0,0 +1,431 @@
+/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsServiceManagerUtils.h"
+#include "nsObjCExceptions.h"
+#include "nsCocoaUtils.h"
+#include "nsIThread.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Assertions.h"
+#include "OSXSpeechSynthesizerService.h"
+
+#import <Cocoa/Cocoa.h>
+
+@class SpeechDelegate;
+
+// We can escape the default delimiters ("[[" and "]]") by temporarily
+// changing the delimiters just before they appear, and changing them back
+// just after.
+#define DLIM_ESCAPE_START "[[dlim (( ))]]"
+#define DLIM_ESCAPE_END "((dlim [[ ]]))"
+
+using namespace mozilla;
+
+class SpeechTaskCallback final : public nsISpeechTaskCallback {
+ public:
+  SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
+                     const nsTArray<size_t>& aOffsets);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback)
+
+  NS_DECL_NSISPEECHTASKCALLBACK
+
+  void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength);
+  void OnError(uint32_t aIndex);
+  void OnDidFinishSpeaking();
+
+ private:
+  virtual ~SpeechTaskCallback();
+
+  float GetTimeDurationFromStart();
+
+  nsCOMPtr<nsISpeechTask> mTask;
+  NSSpeechSynthesizer* mSpeechSynthesizer;
+  SpeechDelegate* mDelegate;
+  TimeStamp mStartingTime;
+  uint32_t mCurrentIndex;
+  nsTArray<size_t> mOffsets;
+};
+
+@interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> {
+ @private
+  SpeechTaskCallback* mCallback;
+}
+
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback;
+@end
+
+@implementation SpeechDelegate
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback {
+  [super init];
+  mCallback = aCallback;
+  return self;
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+            willSpeakWord:(NSRange)aRange
+                 ofString:(NSString*)aString {
+  mCallback->OnWillSpeakWord(aRange.location, aRange.length);
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender didFinishSpeaking:(BOOL)aFinishedSpeaking {
+  mCallback->OnDidFinishSpeaking();
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+    didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
+                    ofString:(NSString*)aString
+                     message:(NSString*)aMessage {
+  mCallback->OnError(aCharacterIndex);
+}
+@end
+
+NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
+
+SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
+                                       const nsTArray<size_t>& aOffsets)
+    : mTask(aTask), mSpeechSynthesizer(aSynth), mCurrentIndex(0), mOffsets(aOffsets.Clone()) {
+  mDelegate = [[SpeechDelegate alloc] initWithCallback:this];
+  [mSpeechSynthesizer setDelegate:mDelegate];
+  mStartingTime = TimeStamp::Now();
+}
+
+SpeechTaskCallback::~SpeechTaskCallback() {
+  [mSpeechSynthesizer setDelegate:nil];
+  [mDelegate release];
+  [mSpeechSynthesizer release];
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnCancel() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer stopSpeaking];
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnPause() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+  if (!mTask) {
+    // When calling pause() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnResume() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer continueSpeaking];
+  if (!mTask) {
+    // When calling resume() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnVolumeChanged(float aVolume) {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
+                    forProperty:NSSpeechVolumeProperty
+                          error:nil];
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+float SpeechTaskCallback::GetTimeDurationFromStart() {
+  TimeDuration duration = TimeStamp::Now() - mStartingTime;
+  return duration.ToSeconds();
+}
+
+void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) {
+  mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
+  if (!mTask) {
+    return;
+  }
+  mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex, aLength, 1);
+}
+
+void SpeechTaskCallback::OnError(uint32_t aIndex) {
+  if (!mTask) {
+    return;
+  }
+  mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
+}
+
+void SpeechTaskCallback::OnDidFinishSpeaking() {
+  mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
+  // no longer needed
+  [mSpeechSynthesizer setDelegate:nil];
+  mTask = nullptr;
+}
+
+namespace mozilla {
+namespace dom {
+
+struct OSXVoice {
+  OSXVoice() : mIsDefault(false) {}
+
+  nsString mUri;
+  nsString mName;
+  nsString mLocale;
+  bool mIsDefault;
+};
+
+class RegisterVoicesRunnable final : public Runnable {
+ public:
+  RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, nsTArray<OSXVoice>& aList)
+      : Runnable("RegisterVoicesRunnable"), mSpeechService(aSpeechService), mVoices(aList) {}
+
+  NS_IMETHOD Run() override;
+
+ private:
+  ~RegisterVoicesRunnable() override = default;
+
+  // This runnable always use sync mode.  It is unnecesarry to reference object
+  OSXSpeechSynthesizerService* mSpeechService;
+  nsTArray<OSXVoice>& mVoices;
+};
+
+NS_IMETHODIMP
+RegisterVoicesRunnable::Run() {
+  nsresult rv;
+  nsCOMPtr<nsISynthVoiceRegistry> registry = do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
+  if (!registry) {
+    return rv;
+  }
+
+  for (OSXVoice voice : mVoices) {
+    rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false);
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      continue;
+    }
+
+    if (voice.mIsDefault) {
+      registry->SetDefaultVoice(voice.mUri, true);
+    }
+  }
+
+  registry->NotifyVoicesChanged();
+
+  return NS_OK;
+}
+
+class EnumVoicesRunnable final : public Runnable {
+ public:
+  explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
+      : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {}
+
+  NS_IMETHOD Run() override;
+
+ private:
+  ~EnumVoicesRunnable() override = default;
+
+  RefPtr<OSXSpeechSynthesizerService> mSpeechService;
+};
+
+NS_IMETHODIMP
+EnumVoicesRunnable::Run() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  AutoTArray<OSXVoice, 64> list;
+
+  NSArray* voices = [NSSpeechSynthesizer availableVoices];
+  NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
+
+  for (NSString* voice in voices) {
+    OSXVoice item;
+
+    NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
+
+    nsAutoString identifier;
+    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], identifier);
+
+    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName);
+
+    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
+    item.mLocale.ReplaceChar('_', '-');
+
+    item.mUri.AssignLiteral("urn:moz-tts:osx:");
+    item.mUri.Append(identifier);
+
+    if ([voice isEqualToString:defaultVoice]) {
+      item.mIsDefault = true;
+    }
+
+    list.AppendElement(item);
+  }
+
+  RefPtr<RegisterVoicesRunnable> runnable = new RegisterVoicesRunnable(mSpeechService, list);
+  NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns,
+                                           GetMainThreadSerialEventTarget(), runnable.forget());
+
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+StaticRefPtr<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::sSingleton;
+
+NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
+NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
+
+OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() : mInitialized(false) {}
+
+bool OSXSpeechSynthesizerService::Init() {
+  if (Preferences::GetBool("media.webspeech.synth.test") ||
+      !StaticPrefs::media_webspeech_synth_enabled()) {
+    // When test is enabled, we shouldn't add OS backend (Bug 1160844)
+    return false;
+  }
+
+  nsCOMPtr<nsIThread> thread;
+  if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
+    return false;
+  }
+
+  // Get all the voices and register in the SynthVoiceRegistry
+  nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
+  thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
+
+  mInitialized = true;
+  return true;
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume,
+                                   float aRate, float aPitch, nsISpeechTask* aTask) {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns),
+             "OSXSpeechSynthesizerService doesn't allow this voice URI");
+
+  NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
+  // strlen("urn:moz-tts:osx:") == 16
+  NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
+  [synth setVoice:identifier];
+
+  // default rate is 180-220
+  [synth setObject:[NSNumber numberWithInt:aRate * 200] forProperty:NSSpeechRateProperty error:nil];
+  // volume allows 0.0-1.0
+  [synth setObject:[NSNumber numberWithFloat:aVolume] forProperty:NSSpeechVolumeProperty error:nil];
+  // Use default pitch value to calculate this
+  NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty error:nil];
+  if (defaultPitch) {
+    int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
+    [synth setObject:[NSNumber numberWithInt:newPitch]
+         forProperty:NSSpeechPitchBaseProperty
+               error:nil];
+  }
+
+  nsAutoString escapedText;
+  // We need to map the the offsets from the given text to the escaped text.
+  // The index of the offsets array is the position in the escaped text,
+  // the element value is the position in the user-supplied text.
+  nsTArray<size_t> offsets;
+  offsets.SetCapacity(aText.Length());
+
+  // This loop looks for occurances of "[[" or "]]", escapes them, and
+  // populates the offsets array to supply a map to the original offsets.
+  for (size_t i = 0; i < aText.Length(); i++) {
+    if (aText.Length() > i + 1 &&
+        ((aText[i] == ']' && aText[i + 1] == ']') || (aText[i] == '[' && aText[i + 1] == '['))) {
+      escapedText.AppendLiteral(DLIM_ESCAPE_START);
+      offsets.AppendElements(strlen(DLIM_ESCAPE_START));
+      escapedText.Append(aText[i]);
+      offsets.AppendElement(i);
+      escapedText.Append(aText[++i]);
+      offsets.AppendElement(i);
+      escapedText.AppendLiteral(DLIM_ESCAPE_END);
+      offsets.AppendElements(strlen(DLIM_ESCAPE_END));
+    } else {
+      escapedText.Append(aText[i]);
+      offsets.AppendElement(i);
+    }
+  }
+
+  RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
+  nsresult rv = aTask->Setup(callback);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  NSString* text = nsCocoaUtils::ToNSString(escapedText);
+  BOOL success = [synth startSpeakingString:text];
+  NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
+
+  aTask->DispatchStart();
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
+                                     const char16_t* aData) {
+  return NS_OK;
+}
+
+OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (XRE_GetProcessType() != GeckoProcessType_Default) {
+    return nullptr;
+  }
+
+  if (!sSingleton) {
+    RefPtr<OSXSpeechSynthesizerService> speechService = new OSXSpeechSynthesizerService();
+    if (speechService->Init()) {
+      sSingleton = speechService;
+      ClearOnShutdown(&sSingleton);
+    }
+  }
+  return sSingleton;
+}
+
+already_AddRefed<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::GetInstanceForService() {
+  RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
+  return speechService.forget();
+}
+
+}  // namespace dom
+}  // namespace mozilla