461 lines
14 KiB
Text
461 lines
14 KiB
Text
/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset:
|
|
* 2 -*- */
|
|
/* vim: set ts=2 sw=2 et tw=80: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "nsISupports.h"
|
|
#include "nsServiceManagerUtils.h"
|
|
#include "nsObjCExceptions.h"
|
|
#include "nsCocoaUtils.h"
|
|
#include "nsIThread.h"
|
|
#include "nsThreadUtils.h"
|
|
#include "nsXULAppAPI.h"
|
|
#include "mozilla/ClearOnShutdown.h"
|
|
#include "mozilla/dom/nsSynthVoiceRegistry.h"
|
|
#include "mozilla/dom/nsSpeechTask.h"
|
|
#include "mozilla/Preferences.h"
|
|
#include "mozilla/StaticPrefs_media.h"
|
|
#include "mozilla/Assertions.h"
|
|
#include "OSXSpeechSynthesizerService.h"
|
|
|
|
#import <Cocoa/Cocoa.h>
|
|
|
|
@class SpeechDelegate;
|
|
|
|
// We can escape the default delimiters ("[[" and "]]") by temporarily
|
|
// changing the delimiters just before they appear, and changing them back
|
|
// just after.
|
|
#define DLIM_ESCAPE_START "[[dlim (( ))]]"
|
|
#define DLIM_ESCAPE_END "((dlim [[ ]]))"
|
|
|
|
using namespace mozilla;
|
|
|
|
class SpeechTaskCallback final : public nsISpeechTaskCallback {
|
|
public:
|
|
SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
|
|
const nsTArray<size_t>& aOffsets);
|
|
|
|
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
|
|
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback,
|
|
nsISpeechTaskCallback)
|
|
|
|
NS_DECL_NSISPEECHTASKCALLBACK
|
|
|
|
void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength);
|
|
void OnError(uint32_t aIndex);
|
|
void OnDidFinishSpeaking();
|
|
|
|
private:
|
|
virtual ~SpeechTaskCallback();
|
|
|
|
float GetTimeDurationFromStart();
|
|
|
|
nsCOMPtr<nsISpeechTask> mTask;
|
|
NSSpeechSynthesizer* mSpeechSynthesizer;
|
|
SpeechDelegate* mDelegate;
|
|
TimeStamp mStartingTime;
|
|
uint32_t mCurrentIndex;
|
|
nsTArray<size_t> mOffsets;
|
|
};
|
|
|
|
@interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> {
|
|
@private
|
|
SpeechTaskCallback* mCallback;
|
|
}
|
|
|
|
- (id)initWithCallback:(SpeechTaskCallback*)aCallback;
|
|
@end
|
|
|
|
@implementation SpeechDelegate
|
|
- (id)initWithCallback:(SpeechTaskCallback*)aCallback {
|
|
[super init];
|
|
mCallback = aCallback;
|
|
return self;
|
|
}
|
|
|
|
- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
|
|
willSpeakWord:(NSRange)aRange
|
|
ofString:(NSString*)aString {
|
|
mCallback->OnWillSpeakWord(aRange.location, aRange.length);
|
|
}
|
|
|
|
- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
|
|
didFinishSpeaking:(BOOL)aFinishedSpeaking {
|
|
mCallback->OnDidFinishSpeaking();
|
|
}
|
|
|
|
- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
|
|
didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
|
|
ofString:(NSString*)aString
|
|
message:(NSString*)aMessage {
|
|
mCallback->OnError(aCharacterIndex);
|
|
}
|
|
@end
|
|
|
|
NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
|
|
|
|
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
|
|
NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
|
|
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
|
|
NS_INTERFACE_MAP_END
|
|
|
|
NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
|
|
NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
|
|
|
|
SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask,
|
|
NSSpeechSynthesizer* aSynth,
|
|
const nsTArray<size_t>& aOffsets)
|
|
: mTask(aTask),
|
|
mSpeechSynthesizer(aSynth),
|
|
mCurrentIndex(0),
|
|
mOffsets(aOffsets.Clone()) {
|
|
mDelegate = [[SpeechDelegate alloc] initWithCallback:this];
|
|
[mSpeechSynthesizer setDelegate:mDelegate];
|
|
mStartingTime = TimeStamp::Now();
|
|
}
|
|
|
|
SpeechTaskCallback::~SpeechTaskCallback() {
|
|
[mSpeechSynthesizer setDelegate:nil];
|
|
[mDelegate release];
|
|
[mSpeechSynthesizer release];
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
SpeechTaskCallback::OnCancel() {
|
|
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
|
|
|
|
[mSpeechSynthesizer stopSpeaking];
|
|
return NS_OK;
|
|
|
|
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
SpeechTaskCallback::OnPause() {
|
|
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
|
|
|
|
[mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
|
|
if (!mTask) {
|
|
// When calling pause() on child porcess, it may not receive end event
|
|
// from chrome process yet.
|
|
return NS_ERROR_FAILURE;
|
|
}
|
|
mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
|
|
return NS_OK;
|
|
|
|
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
SpeechTaskCallback::OnResume() {
|
|
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
|
|
|
|
[mSpeechSynthesizer continueSpeaking];
|
|
if (!mTask) {
|
|
// When calling resume() on child porcess, it may not receive end event
|
|
// from chrome process yet.
|
|
return NS_ERROR_FAILURE;
|
|
}
|
|
mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
|
|
return NS_OK;
|
|
|
|
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
SpeechTaskCallback::OnVolumeChanged(float aVolume) {
|
|
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
|
|
|
|
[mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
|
|
forProperty:NSSpeechVolumeProperty
|
|
error:nil];
|
|
return NS_OK;
|
|
|
|
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
float SpeechTaskCallback::GetTimeDurationFromStart() {
|
|
TimeDuration duration = TimeStamp::Now() - mStartingTime;
|
|
return duration.ToSeconds();
|
|
}
|
|
|
|
void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) {
|
|
mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
|
|
if (!mTask) {
|
|
return;
|
|
}
|
|
mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex,
|
|
aLength, 1);
|
|
}
|
|
|
|
void SpeechTaskCallback::OnError(uint32_t aIndex) {
|
|
if (!mTask) {
|
|
return;
|
|
}
|
|
mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
|
|
}
|
|
|
|
void SpeechTaskCallback::OnDidFinishSpeaking() {
|
|
mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
|
|
// no longer needed
|
|
[mSpeechSynthesizer setDelegate:nil];
|
|
mTask = nullptr;
|
|
}
|
|
|
|
namespace mozilla {
|
|
namespace dom {
|
|
|
|
struct OSXVoice {
|
|
OSXVoice() : mIsDefault(false) {}
|
|
|
|
nsString mUri;
|
|
nsString mName;
|
|
nsString mLocale;
|
|
bool mIsDefault;
|
|
};
|
|
|
|
class RegisterVoicesRunnable final : public Runnable {
|
|
public:
|
|
RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService,
|
|
nsTArray<OSXVoice>& aList)
|
|
: Runnable("RegisterVoicesRunnable"),
|
|
mSpeechService(aSpeechService),
|
|
mVoices(aList) {}
|
|
|
|
NS_IMETHOD Run() override;
|
|
|
|
private:
|
|
~RegisterVoicesRunnable() override = default;
|
|
|
|
// This runnable always use sync mode. It is unnecesarry to reference object
|
|
OSXSpeechSynthesizerService* mSpeechService;
|
|
nsTArray<OSXVoice>& mVoices;
|
|
};
|
|
|
|
NS_IMETHODIMP
|
|
RegisterVoicesRunnable::Run() {
|
|
nsresult rv;
|
|
nsCOMPtr<nsISynthVoiceRegistry> registry =
|
|
do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
|
|
if (!registry) {
|
|
return rv;
|
|
}
|
|
|
|
for (OSXVoice voice : mVoices) {
|
|
rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName,
|
|
voice.mLocale, true, false);
|
|
if (NS_WARN_IF(NS_FAILED(rv))) {
|
|
continue;
|
|
}
|
|
|
|
if (voice.mIsDefault) {
|
|
registry->SetDefaultVoice(voice.mUri, true);
|
|
}
|
|
}
|
|
|
|
registry->NotifyVoicesChanged();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
class EnumVoicesRunnable final : public Runnable {
|
|
public:
|
|
explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
|
|
: Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {}
|
|
|
|
NS_IMETHOD Run() override;
|
|
|
|
private:
|
|
~EnumVoicesRunnable() override = default;
|
|
|
|
RefPtr<OSXSpeechSynthesizerService> mSpeechService;
|
|
};
|
|
|
|
NS_IMETHODIMP
|
|
EnumVoicesRunnable::Run() {
|
|
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
|
|
|
|
AutoTArray<OSXVoice, 64> list;
|
|
|
|
NSArray* voices = [NSSpeechSynthesizer availableVoices];
|
|
NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
|
|
|
|
for (NSString* voice in voices) {
|
|
OSXVoice item;
|
|
|
|
NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
|
|
|
|
nsAutoString identifier;
|
|
nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier],
|
|
identifier);
|
|
|
|
nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName],
|
|
item.mName);
|
|
|
|
nsCocoaUtils::GetStringForNSString(
|
|
[attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
|
|
item.mLocale.ReplaceChar('_', '-');
|
|
|
|
item.mUri.AssignLiteral("urn:moz-tts:osx:");
|
|
item.mUri.Append(identifier);
|
|
|
|
if ([voice isEqualToString:defaultVoice]) {
|
|
item.mIsDefault = true;
|
|
}
|
|
|
|
list.AppendElement(item);
|
|
}
|
|
|
|
RefPtr<RegisterVoicesRunnable> runnable =
|
|
new RegisterVoicesRunnable(mSpeechService, list);
|
|
NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns,
|
|
GetMainThreadSerialEventTarget(),
|
|
runnable.forget());
|
|
|
|
return NS_OK;
|
|
|
|
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
StaticRefPtr<OSXSpeechSynthesizerService>
|
|
OSXSpeechSynthesizerService::sSingleton;
|
|
|
|
NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
|
|
NS_INTERFACE_MAP_ENTRY(nsISpeechService)
|
|
NS_INTERFACE_MAP_ENTRY(nsIObserver)
|
|
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
|
|
NS_INTERFACE_MAP_END
|
|
|
|
NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
|
|
NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
|
|
|
|
OSXSpeechSynthesizerService::OSXSpeechSynthesizerService()
|
|
: mInitialized(false) {}
|
|
|
|
bool OSXSpeechSynthesizerService::Init() {
|
|
if (Preferences::GetBool("media.webspeech.synth.test") ||
|
|
!StaticPrefs::media_webspeech_synth_enabled()) {
|
|
// When test is enabled, we shouldn't add OS backend (Bug 1160844)
|
|
return false;
|
|
}
|
|
|
|
nsCOMPtr<nsIThread> thread;
|
|
if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
|
|
return false;
|
|
}
|
|
|
|
// Get all the voices and register in the SynthVoiceRegistry
|
|
nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
|
|
thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
|
|
|
|
mInitialized = true;
|
|
return true;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
OSXSpeechSynthesizerService::Speak(const nsAString& aText,
|
|
const nsAString& aUri, float aVolume,
|
|
float aRate, float aPitch,
|
|
nsISpeechTask* aTask) {
|
|
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
|
|
|
|
MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns),
|
|
"OSXSpeechSynthesizerService doesn't allow this voice URI");
|
|
|
|
NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
|
|
// strlen("urn:moz-tts:osx:") == 16
|
|
NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
|
|
[synth setVoice:identifier];
|
|
|
|
// default rate is 180-220
|
|
[synth setObject:[NSNumber numberWithInt:aRate * 200]
|
|
forProperty:NSSpeechRateProperty
|
|
error:nil];
|
|
// volume allows 0.0-1.0
|
|
[synth setObject:[NSNumber numberWithFloat:aVolume]
|
|
forProperty:NSSpeechVolumeProperty
|
|
error:nil];
|
|
// Use default pitch value to calculate this
|
|
NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty
|
|
error:nil];
|
|
if (defaultPitch) {
|
|
int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
|
|
[synth setObject:[NSNumber numberWithInt:newPitch]
|
|
forProperty:NSSpeechPitchBaseProperty
|
|
error:nil];
|
|
}
|
|
|
|
nsAutoString escapedText;
|
|
// We need to map the the offsets from the given text to the escaped text.
|
|
// The index of the offsets array is the position in the escaped text,
|
|
// the element value is the position in the user-supplied text.
|
|
nsTArray<size_t> offsets;
|
|
offsets.SetCapacity(aText.Length());
|
|
|
|
// This loop looks for occurances of "[[" or "]]", escapes them, and
|
|
// populates the offsets array to supply a map to the original offsets.
|
|
for (size_t i = 0; i < aText.Length(); i++) {
|
|
if (aText.Length() > i + 1 && ((aText[i] == ']' && aText[i + 1] == ']') ||
|
|
(aText[i] == '[' && aText[i + 1] == '['))) {
|
|
escapedText.AppendLiteral(DLIM_ESCAPE_START);
|
|
offsets.AppendElements(strlen(DLIM_ESCAPE_START));
|
|
escapedText.Append(aText[i]);
|
|
offsets.AppendElement(i);
|
|
escapedText.Append(aText[++i]);
|
|
offsets.AppendElement(i);
|
|
escapedText.AppendLiteral(DLIM_ESCAPE_END);
|
|
offsets.AppendElements(strlen(DLIM_ESCAPE_END));
|
|
} else {
|
|
escapedText.Append(aText[i]);
|
|
offsets.AppendElement(i);
|
|
}
|
|
}
|
|
|
|
RefPtr<SpeechTaskCallback> callback =
|
|
new SpeechTaskCallback(aTask, synth, offsets);
|
|
nsresult rv = aTask->Setup(callback);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
NSString* text = nsCocoaUtils::ToNSString(escapedText);
|
|
BOOL success = [synth startSpeakingString:text];
|
|
NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
|
|
|
|
aTask->DispatchStart();
|
|
return NS_OK;
|
|
|
|
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
|
|
const char16_t* aData) {
|
|
return NS_OK;
|
|
}
|
|
|
|
OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() {
|
|
MOZ_ASSERT(NS_IsMainThread());
|
|
if (XRE_GetProcessType() != GeckoProcessType_Default) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (!sSingleton) {
|
|
RefPtr<OSXSpeechSynthesizerService> speechService =
|
|
new OSXSpeechSynthesizerService();
|
|
if (speechService->Init()) {
|
|
sSingleton = speechService;
|
|
ClearOnShutdown(&sSingleton);
|
|
}
|
|
}
|
|
return sSingleton;
|
|
}
|
|
|
|
already_AddRefed<OSXSpeechSynthesizerService>
|
|
OSXSpeechSynthesizerService::GetInstanceForService() {
|
|
RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
|
|
return speechService.forget();
|
|
}
|
|
|
|
} // namespace dom
|
|
} // namespace mozilla
|