118 files changed, 12360 insertions, 0 deletions
diff --git a/dom/media/webspeech/moz.build b/dom/media/webspeech/moz.build
new file mode 100644
index 0000000000..26856a0598
--- /dev/null
+++ b/dom/media/webspeech/moz.build
@@ -0,0 +1,12 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIRS = ["synth"]
+
+if CONFIG["MOZ_WEBSPEECH"]:
+    DIRS += ["recognition"]
+
+with Files("**"):
+    BUG_COMPONENT = ("Core", "Web Speech")
diff --git a/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.cpp b/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.cpp
new file mode 100644
index 0000000000..e68ccc417e
--- /dev/null
+++ b/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.cpp
@@ -0,0 +1,462 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsThreadUtils.h"
+#include "nsXPCOMCIDInternal.h"
+#include "OnlineSpeechRecognitionService.h"
+#include "nsIFile.h"
+#include "SpeechGrammar.h"
+#include "SpeechRecognition.h"
+#include "SpeechRecognitionAlternative.h"
+#include "SpeechRecognitionResult.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsIObserverService.h"
+#include "mozilla/dom/Document.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Services.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsNetUtil.h"
+#include "nsContentUtils.h"
+#include "nsIChannel.h"
+#include "nsIHttpChannel.h"
+#include "nsIPrincipal.h"
+#include "nsIStreamListener.h"
+#include "nsIUploadChannel2.h"
+#include "mozilla/dom/ClientIPCTypes.h"
+#include "nsStringStream.h"
+#include "nsIOutputStream.h"
+#include "nsStreamUtils.h"
+#include "OpusTrackEncoder.h"
+#include "OggWriter.h"
+#include "nsIClassOfService.h"
+#include <json/json.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+namespace mozilla {
+
+using namespace dom;
+
+#define PREFERENCE_DEFAULT_RECOGNITION_ENDPOINT \
+  "media.webspeech.service.endpoint"
+#define DEFAULT_RECOGNITION_ENDPOINT "https://speaktome-2.services.mozilla.com/"
+#define MAX_LISTENING_TIME_MS 10000
+
+NS_IMPL_ISUPPORTS(OnlineSpeechRecognitionService, nsISpeechRecognitionService,
+                  nsIStreamListener)
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::OnStartRequest(nsIRequest* aRequest) {
+  MOZ_ASSERT(NS_IsMainThread());
+  return NS_OK;
+}
+
+static nsresult AssignResponseToBuffer(nsIInputStream* aIn, void* aClosure,
+                                       const char* aFromRawSegment,
+                                       uint32_t aToOffset, uint32_t aCount,
+                                       uint32_t* aWriteCount) {
+  nsCString* buf = static_cast<nsCString*>(aClosure);
+  buf->Append(aFromRawSegment, aCount);
+  *aWriteCount = aCount;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::OnDataAvailable(nsIRequest* aRequest,
+                                                nsIInputStream* aInputStream,
+                                                uint64_t aOffset,
+                                                uint32_t aCount) {
+  MOZ_ASSERT(NS_IsMainThread());
+  nsresult rv;
+  uint32_t readCount;
+  rv = aInputStream->ReadSegments(AssignResponseToBuffer, &mBuf, aCount,
+                                  &readCount);
+  NS_ENSURE_SUCCESS(rv, rv);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::OnStopRequest(nsIRequest* aRequest,
+                                              nsresult aStatusCode) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  auto clearBuf = MakeScopeExit([&] { mBuf.Truncate(); });
+
+  if (mAborted) {
+    return NS_OK;
+  }
+
+  bool success;
+  float confidence = 0;
+  Json::Value root;
+  Json::CharReaderBuilder builder;
+  bool parsingSuccessful;
+  nsAutoCString result;
+  nsAutoCString hypoValue;
+  nsAutoCString errorMsg;
+  SpeechRecognitionErrorCode errorCode;
+
+  SR_LOG("STT Result: %s", mBuf.get());
+
+  if (NS_FAILED(aStatusCode)) {
+    success = false;
+    errorMsg.AssignLiteral("Error connecting to the service.");
+    errorCode = SpeechRecognitionErrorCode::Network;
+  } else {
+    success = true;
+    UniquePtr<Json::CharReader> const reader(builder.newCharReader());
+    parsingSuccessful =
+        reader->parse(mBuf.BeginReading(), mBuf.EndReading(), &root, nullptr);
+    if (!parsingSuccessful) {
+      // there's an internal server error
+      success = false;
+      errorMsg.AssignLiteral("Internal server error");
+      errorCode = SpeechRecognitionErrorCode::Network;
+    } else {
+      result.Assign(root.get("status", "error").asString().c_str());
+      if (result.EqualsLiteral("ok")) {
+        // ok, we have a result
+        if (!root["data"].empty()) {
+          hypoValue.Assign(root["data"][0].get("text", "").asString().c_str());
+          confidence = root["data"][0].get("confidence", "0").asFloat();
+        } else {
+          success = false;
+          errorMsg.AssignLiteral("Error reading result data.");
+          errorCode = SpeechRecognitionErrorCode::Network;
+        }
+      } else {
+        success = false;
+        errorMsg.Assign(root.get("message", "").asString().c_str());
+        errorCode = SpeechRecognitionErrorCode::No_speech;
+      }
+    }
+  }
+
+  if (!success) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR, errorCode, errorMsg);
+  } else {
+    // Declare javascript result events
+    RefPtr<SpeechEvent> event = new SpeechEvent(
+        mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
+    SpeechRecognitionResultList* resultList =
+        new SpeechRecognitionResultList(mRecognition);
+    SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
+
+    if (mRecognition->MaxAlternatives() > 0) {
+      SpeechRecognitionAlternative* alternative =
+          new SpeechRecognitionAlternative(mRecognition);
+
+      alternative->mTranscript = NS_ConvertUTF8toUTF16(hypoValue);
+      alternative->mConfidence = confidence;
+
+      result->mItems.AppendElement(alternative);
+    }
+    resultList->mItems.AppendElement(result);
+
+    event->mRecognitionResultList = resultList;
+    NS_DispatchToMainThread(event);
+  }
+
+  return NS_OK;
+}
+
+OnlineSpeechRecognitionService::OnlineSpeechRecognitionService() = default;
+OnlineSpeechRecognitionService::~OnlineSpeechRecognitionService() = default;
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::Initialize(
+    WeakPtr<SpeechRecognition> aSpeechRecognition) {
+  MOZ_ASSERT(NS_IsMainThread());
+  mWriter = MakeUnique<OggWriter>();
+  mRecognition = new nsMainThreadPtrHolder<SpeechRecognition>(
+      "OnlineSpeechRecognitionService::mRecognition", aSpeechRecognition);
+  mEncodeTaskQueue = mRecognition->GetTaskQueueForEncoding();
+  MOZ_ASSERT(mEncodeTaskQueue);
+  return NS_OK;
+}
+
+void OnlineSpeechRecognitionService::EncoderFinished() {
+  MOZ_ASSERT(!NS_IsMainThread());
+  MOZ_ASSERT(mEncodedAudioQueue.IsFinished());
+
+  while (RefPtr<EncodedFrame> frame = mEncodedAudioQueue.PopFront()) {
+    AutoTArray<RefPtr<EncodedFrame>, 1> frames({frame});
+    DebugOnly<nsresult> rv =
+        mWriter->WriteEncodedTrack(frames, mEncodedAudioQueue.AtEndOfStream()
+                                               ? ContainerWriter::END_OF_STREAM
+                                               : 0);
+    MOZ_ASSERT(NS_SUCCEEDED(rv));
+  }
+
+  mWriter->GetContainerData(&mEncodedData, ContainerWriter::FLUSH_NEEDED);
+  MOZ_ASSERT(mWriter->IsWritingComplete());
+
+  NS_DispatchToMainThread(
+      NewRunnableMethod("OnlineSpeechRecognitionService::DoSTT", this,
+                        &OnlineSpeechRecognitionService::DoSTT));
+}
+
+void OnlineSpeechRecognitionService::EncoderInitialized() {
+  MOZ_ASSERT(!NS_IsMainThread());
+  AutoTArray<RefPtr<TrackMetadataBase>, 1> metadata;
+  metadata.AppendElement(mAudioEncoder->GetMetadata());
+  if (metadata[0]->GetKind() != TrackMetadataBase::METADATA_OPUS) {
+    SR_LOG("wrong meta data type!");
+    MOZ_ASSERT_UNREACHABLE();
+  }
+
+  nsresult rv = mWriter->SetMetadata(metadata);
+  MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+
+  rv = mWriter->GetContainerData(&mEncodedData, ContainerWriter::GET_HEADER);
+  MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+
+  Unused << rv;
+}
+
+void OnlineSpeechRecognitionService::EncoderError() {
+  MOZ_ASSERT(!NS_IsMainThread());
+  SR_LOG("Error encoding frames.");
+  mEncodedData.Clear();
+  NS_DispatchToMainThread(NS_NewRunnableFunction(
+      "SpeechRecognition::DispatchError",
+      [this, self = RefPtr<OnlineSpeechRecognitionService>(this)]() {
+        if (!mRecognition) {
+          return;
+        }
+        mRecognition->DispatchError(
+            SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+            SpeechRecognitionErrorCode::Audio_capture, "Encoder error");
+      }));
+}
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::ProcessAudioSegment(AudioSegment* aAudioSegment,
+                                                    int32_t aSampleRate) {
+  MOZ_ASSERT(!NS_IsMainThread());
+  int64_t duration = aAudioSegment->GetDuration();
+  if (duration <= 0) {
+    return NS_OK;
+  }
+
+  if (!mAudioEncoder) {
+    mSpeechEncoderListener = new SpeechEncoderListener(this);
+    mAudioEncoder =
+        MakeUnique<OpusTrackEncoder>(aSampleRate, mEncodedAudioQueue);
+    RefPtr<AbstractThread> mEncoderThread = AbstractThread::GetCurrent();
+    mAudioEncoder->SetWorkerThread(mEncoderThread);
+    mAudioEncoder->RegisterListener(mSpeechEncoderListener);
+  }
+
+  mAudioEncoder->AppendAudioSegment(std::move(*aAudioSegment));
+
+  TimeStamp now = TimeStamp::Now();
+  if (mFirstIteration.IsNull()) {
+    mFirstIteration = now;
+  }
+
+  if ((now - mFirstIteration).ToMilliseconds() >= MAX_LISTENING_TIME_MS) {
+    NS_DispatchToMainThread(NS_NewRunnableFunction(
+        "SpeechRecognition::Stop",
+        [this, self = RefPtr<OnlineSpeechRecognitionService>(this)]() {
+          if (!mRecognition) {
+            return;
+          }
+          mRecognition->Stop();
+        }));
+
+    return NS_OK;
+  }
+
+  return NS_OK;
+}
+
+void OnlineSpeechRecognitionService::DoSTT() {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (mAborted) {
+    return;
+  }
+
+  nsresult rv;
+  nsCOMPtr<nsIChannel> chan;
+  nsCOMPtr<nsIURI> uri;
+  nsAutoCString speechRecognitionEndpoint;
+  nsAutoCString prefEndpoint;
+  nsAutoString language;
+
+  Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_ENDPOINT,
+                          prefEndpoint);
+
+  if (!prefEndpoint.IsEmpty()) {
+    speechRecognitionEndpoint = prefEndpoint;
+  } else {
+    speechRecognitionEndpoint = DEFAULT_RECOGNITION_ENDPOINT;
+  }
+
+  rv = NS_NewURI(getter_AddRefs(uri), speechRecognitionEndpoint, nullptr,
+                 nullptr);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+        SpeechRecognitionErrorCode::Network, "Unknown URI");
+    return;
+  }
+
+  nsSecurityFlags secFlags = nsILoadInfo::SEC_REQUIRE_CORS_INHERITS_SEC_CONTEXT;
+  nsLoadFlags loadFlags =
+      nsIRequest::LOAD_NORMAL | nsIChannel::LOAD_BYPASS_SERVICE_WORKER;
+  nsContentPolicyType contentPolicy = nsIContentPolicy::TYPE_OTHER;
+
+  nsPIDOMWindowInner* window = mRecognition->GetOwner();
+  if (NS_WARN_IF(!window)) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+        SpeechRecognitionErrorCode::Aborted, "No window");
+    return;
+  }
+
+  Document* doc = window->GetExtantDoc();
+  if (NS_WARN_IF(!doc)) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+        SpeechRecognitionErrorCode::Aborted, "No document");
+  }
+  rv = NS_NewChannel(getter_AddRefs(chan), uri, doc->NodePrincipal(), secFlags,
+                     contentPolicy, nullptr, nullptr, nullptr, nullptr,
+                     loadFlags);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+        SpeechRecognitionErrorCode::Network, "Failed to open channel");
+    return;
+  }
+
+  nsCOMPtr<nsIHttpChannel> httpChan = do_QueryInterface(chan);
+  if (httpChan) {
+    rv = httpChan->SetRequestMethod("POST"_ns);
+    MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
+  }
+
+  if (httpChan) {
+    mRecognition->GetLang(language);
+    // Accept-Language-STT is a custom header of our backend server used to set
+    // the language of the speech sample being submitted by the client
+    rv = httpChan->SetRequestHeader("Accept-Language-STT"_ns,
+                                    NS_ConvertUTF16toUTF8(language), false);
+    MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
+    // Tell the server to not store the transcription by default
+    rv = httpChan->SetRequestHeader("Store-Transcription"_ns, "0"_ns, false);
+    MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
+    // Tell the server to not store the sample by default
+    rv = httpChan->SetRequestHeader("Store-Sample"_ns, "0"_ns, false);
+    MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
+    // Set the product tag as teh web speech api
+    rv = httpChan->SetRequestHeader("Product-Tag"_ns, "wsa"_ns, false);
+    MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
+  }
+
+  nsCOMPtr<nsIClassOfService> cos(do_QueryInterface(chan));
+  if (cos) {
+    cos->AddClassFlags(nsIClassOfService::UrgentStart);
+  }
+
+  nsCOMPtr<nsIUploadChannel2> uploadChan = do_QueryInterface(chan);
+  if (uploadChan) {
+    nsCOMPtr<nsIInputStream> bodyStream;
+    uint32_t length = 0;
+    for (const nsTArray<uint8_t>& chunk : mEncodedData) {
+      length += chunk.Length();
+    }
+
+    nsTArray<uint8_t> audio;
+    if (!audio.SetCapacity(length, fallible)) {
+      mRecognition->DispatchError(
+          SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+          SpeechRecognitionErrorCode::Audio_capture, "Allocation error");
+      return;
+    }
+
+    for (const nsTArray<uint8_t>& chunk : mEncodedData) {
+      audio.AppendElements(chunk);
+    }
+
+    mEncodedData.Clear();
+
+    rv = NS_NewByteInputStream(getter_AddRefs(bodyStream), std::move(audio));
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      mRecognition->DispatchError(
+          SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+          SpeechRecognitionErrorCode::Network, "Failed to open stream");
+      return;
+    }
+    if (bodyStream) {
+      rv = uploadChan->ExplicitSetUploadStream(bodyStream, "audio/ogg"_ns,
+                                               length, "POST"_ns, false);
+      MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
+    }
+  }
+
+  rv = chan->AsyncOpen(this);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+        SpeechRecognitionErrorCode::Network, "Internal server error");
+  }
+}
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::SoundEnd() {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!mEncodeTaskQueue) {
+    // Not initialized
+    return NS_OK;
+  }
+
+  nsresult rv = mEncodeTaskQueue->Dispatch(NS_NewRunnableFunction(
+      "OnlineSpeechRecognitionService::SoundEnd",
+      [this, self = RefPtr<OnlineSpeechRecognitionService>(this)]() {
+        if (mAudioEncoder) {
+          mAudioEncoder->NotifyEndOfStream();
+          mAudioEncoder->UnregisterListener(mSpeechEncoderListener);
+          mSpeechEncoderListener = nullptr;
+          mAudioEncoder = nullptr;
+          EncoderFinished();
+        }
+      }));
+  MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+  Unused << rv;
+
+  mEncodeTaskQueue = nullptr;
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::ValidateAndSetGrammarList(
+    SpeechGrammar* aSpeechGrammar,
+    nsISpeechGrammarCompilationCallback* aCallback) {
+  // This is an online LVCSR (STT) service,
+  // so we don't need to set a grammar
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+OnlineSpeechRecognitionService::Abort() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (mAborted) {
+    return NS_OK;
+  }
+  mAborted = true;
+  return SoundEnd();
+}
+}  // namespace mozilla
diff --git a/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h b/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
new file mode 100644
index 0000000000..c049e5046a
--- /dev/null
+++ b/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
@@ -0,0 +1,132 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_OnlineRecognitionService_h
+#define mozilla_dom_OnlineRecognitionService_h
+
+#include "nsCOMPtr.h"
+#include "nsTArray.h"
+#include "nsISpeechRecognitionService.h"
+#include "speex/speex_resampler.h"
+#include "nsIStreamListener.h"
+#include "OpusTrackEncoder.h"
+#include "ContainerWriter.h"
+
+#define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
+  {0x0ff5ce56,                                   \
+   0x5b09,                                       \
+   0x4db8,                                       \
+   {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};
+
+namespace mozilla {
+
+namespace ipc {
+class PrincipalInfo;
+}  // namespace ipc
+
+/**
+ * Online implementation of the nsISpeechRecognitionService interface
+ */
+class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
+                                       public nsIStreamListener {
+ public:
+  // Add XPCOM glue code
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSISPEECHRECOGNITIONSERVICE
+  NS_DECL_NSIREQUESTOBSERVER
+  NS_DECL_NSISTREAMLISTENER
+
+  /**
+   * Listener responsible for handling the events raised by the TrackEncoder
+   */
+  class SpeechEncoderListener : public TrackEncoderListener {
+   public:
+    explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
+        : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}
+
+    void Started(TrackEncoder* aEncoder) override {}
+
+    void Initialized(TrackEncoder* aEncoder) override {
+      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
+      mService->EncoderInitialized();
+    }
+
+    void Error(TrackEncoder* aEncoder) override {
+      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
+      mService->EncoderError();
+    }
+
+   private:
+    const RefPtr<OnlineSpeechRecognitionService> mService;
+    const RefPtr<AbstractThread> mOwningThread;
+  };
+
+  /**
+   * Default constructs a OnlineSpeechRecognitionService
+   */
+  OnlineSpeechRecognitionService();
+
+  /**
+   * Called by SpeechEncoderListener when the AudioTrackEncoder has been
+   * initialized.
+   */
+  void EncoderInitialized();
+
+  /**
+   * Called after the AudioTrackEncoder has encoded all data for us to wrap in a
+   * container and pass along.
+   */
+  void EncoderFinished();
+
+  /**
+   * Called by SpeechEncoderListener when the AudioTrackEncoder has
+   * encountered an error.
+   */
+  void EncoderError();
+
+ private:
+  /**
+   * Private destructor to prevent bypassing of reference counting
+   */
+  virtual ~OnlineSpeechRecognitionService();
+
+  /** The associated SpeechRecognition */
+  nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;
+
+  /**
+   * Builds a mock SpeechRecognitionResultList
+   */
+  dom::SpeechRecognitionResultList* BuildMockResultList();
+
+  /**
+   * Method responsible for uploading the audio to the remote endpoint
+   */
+  void DoSTT();
+
+  // Encoded and packaged ogg audio data
+  nsTArray<nsTArray<uint8_t>> mEncodedData;
+  // Member responsible for holding a reference to the TrackEncoderListener
+  RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
+  // MediaQueue fed encoded data by mAudioEncoder
+  MediaQueue<EncodedFrame> mEncodedAudioQueue;
+  // Encoder responsible for encoding the frames from pcm to opus which is the
+  // format supported by our backend
+  UniquePtr<AudioTrackEncoder> mAudioEncoder;
+  // Object responsible for wrapping the opus frames into an ogg container
+  UniquePtr<ContainerWriter> mWriter;
+  // Member responsible for storing the json string returned by the endpoint
+  nsCString mBuf;
+  // Used to calculate a ceiling on the time spent listening.
+  TimeStamp mFirstIteration;
+  // flag responsible to control if the user choose to abort
+  bool mAborted = false;
+  //  reference to the audio encoder queue
+  RefPtr<TaskQueue> mEncodeTaskQueue;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechGrammar.cpp b/dom/media/webspeech/recognition/SpeechGrammar.cpp
new file mode 100644
index 0000000000..de6e9fa30f
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechGrammar.cpp
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechGrammar.h"
+
+#include "mozilla/ErrorResult.h"
+#include "mozilla/dom/SpeechGrammarBinding.h"
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechGrammar, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammar)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammar)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammar)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechGrammar::SpeechGrammar(nsISupports* aParent) : mParent(aParent) {}
+
+SpeechGrammar::~SpeechGrammar() = default;
+
+already_AddRefed<SpeechGrammar> SpeechGrammar::Constructor(
+    const GlobalObject& aGlobal) {
+  RefPtr<SpeechGrammar> speechGrammar =
+      new SpeechGrammar(aGlobal.GetAsSupports());
+  return speechGrammar.forget();
+}
+
+nsISupports* SpeechGrammar::GetParentObject() const { return mParent; }
+
+JSObject* SpeechGrammar::WrapObject(JSContext* aCx,
+                                    JS::Handle<JSObject*> aGivenProto) {
+  return SpeechGrammar_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+void SpeechGrammar::GetSrc(nsString& aRetVal, ErrorResult& aRv) const {
+  aRetVal = mSrc;
+}
+
+void SpeechGrammar::SetSrc(const nsAString& aArg, ErrorResult& aRv) {
+  mSrc = aArg;
+}
+
+float SpeechGrammar::GetWeight(ErrorResult& aRv) const {
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return 0;
+}
+
+void SpeechGrammar::SetWeight(float aArg, ErrorResult& aRv) {
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechGrammar.h b/dom/media/webspeech/recognition/SpeechGrammar.h
new file mode 100644
index 0000000000..0dee1e9792
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechGrammar.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechGrammar_h
+#define mozilla_dom_SpeechGrammar_h
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "js/TypeDecls.h"
+
+#include "mozilla/Attributes.h"
+
+namespace mozilla {
+class ErrorResult;
+
+namespace dom {
+
+class GlobalObject;
+
+class SpeechGrammar final : public nsISupports, public nsWrapperCache {
+ public:
+  explicit SpeechGrammar(nsISupports* aParent);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechGrammar)
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  static already_AddRefed<SpeechGrammar> Constructor(
+      const GlobalObject& aGlobal);
+
+  static already_AddRefed<SpeechGrammar> WebkitSpeechGrammar(
+      const GlobalObject& aGlobal, ErrorResult& aRv) {
+    return Constructor(aGlobal);
+  }
+
+  void GetSrc(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetSrc(const nsAString& aArg, ErrorResult& aRv);
+
+  float GetWeight(ErrorResult& aRv) const;
+
+  void SetWeight(float aArg, ErrorResult& aRv);
+
+ private:
+  ~SpeechGrammar();
+
+  nsCOMPtr<nsISupports> mParent;
+
+  nsString mSrc;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechGrammarList.cpp b/dom/media/webspeech/recognition/SpeechGrammarList.cpp
new file mode 100644
index 0000000000..4317452057
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechGrammarList.cpp
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechGrammarList.h"
+
+#include "mozilla/dom/SpeechGrammar.h"
+#include "mozilla/dom/SpeechGrammarListBinding.h"
+#include "mozilla/ErrorResult.h"
+#include "nsCOMPtr.h"
+#include "SpeechRecognition.h"
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechGrammarList, mParent, mItems)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammarList)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammarList)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammarList)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechGrammarList::SpeechGrammarList(nsISupports* aParent) : mParent(aParent) {}
+
+SpeechGrammarList::~SpeechGrammarList() = default;
+
+already_AddRefed<SpeechGrammarList> SpeechGrammarList::Constructor(
+    const GlobalObject& aGlobal) {
+  RefPtr<SpeechGrammarList> speechGrammarList =
+      new SpeechGrammarList(aGlobal.GetAsSupports());
+  return speechGrammarList.forget();
+}
+
+JSObject* SpeechGrammarList::WrapObject(JSContext* aCx,
+                                        JS::Handle<JSObject*> aGivenProto) {
+  return SpeechGrammarList_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+nsISupports* SpeechGrammarList::GetParentObject() const { return mParent; }
+
+uint32_t SpeechGrammarList::Length() const { return mItems.Length(); }
+
+already_AddRefed<SpeechGrammar> SpeechGrammarList::Item(uint32_t aIndex,
+                                                        ErrorResult& aRv) {
+  RefPtr<SpeechGrammar> result = mItems.ElementAt(aIndex);
+  return result.forget();
+}
+
+void SpeechGrammarList::AddFromURI(const nsAString& aSrc,
+                                   const Optional<float>& aWeight,
+                                   ErrorResult& aRv) {
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+}
+
+void SpeechGrammarList::AddFromString(const nsAString& aString,
+                                      const Optional<float>& aWeight,
+                                      ErrorResult& aRv) {
+  SpeechGrammar* speechGrammar = new SpeechGrammar(mParent);
+  speechGrammar->SetSrc(aString, aRv);
+  mItems.AppendElement(speechGrammar);
+}
+
+already_AddRefed<SpeechGrammar> SpeechGrammarList::IndexedGetter(
+    uint32_t aIndex, bool& aPresent, ErrorResult& aRv) {
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+  ErrorResult rv;
+  aPresent = true;
+  return Item(aIndex, rv);
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechGrammarList.h b/dom/media/webspeech/recognition/SpeechGrammarList.h
new file mode 100644
index 0000000000..7f1e09cd9e
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechGrammarList.h
@@ -0,0 +1,73 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechGrammarList_h
+#define mozilla_dom_SpeechGrammarList_h
+
+#include "mozilla/Attributes.h"
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsTArray.h"
+#include "nsWrapperCache.h"
+
+struct JSContext;
+
+namespace mozilla {
+
+class ErrorResult;
+
+namespace dom {
+
+class GlobalObject;
+class SpeechGrammar;
+template <typename>
+class Optional;
+
+class SpeechGrammarList final : public nsISupports, public nsWrapperCache {
+ public:
+  explicit SpeechGrammarList(nsISupports* aParent);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechGrammarList)
+
+  static already_AddRefed<SpeechGrammarList> Constructor(
+      const GlobalObject& aGlobal);
+
+  static already_AddRefed<SpeechGrammarList> WebkitSpeechGrammarList(
+      const GlobalObject& aGlobal, ErrorResult& aRv) {
+    return Constructor(aGlobal);
+  }
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechGrammar> Item(uint32_t aIndex, ErrorResult& aRv);
+
+  void AddFromURI(const nsAString& aSrc, const Optional<float>& aWeight,
+                  ErrorResult& aRv);
+
+  void AddFromString(const nsAString& aString, const Optional<float>& aWeight,
+                     ErrorResult& aRv);
+
+  already_AddRefed<SpeechGrammar> IndexedGetter(uint32_t aIndex, bool& aPresent,
+                                                ErrorResult& aRv);
+
+ private:
+  ~SpeechGrammarList();
+
+  nsCOMPtr<nsISupports> mParent;
+
+  nsTArray<RefPtr<SpeechGrammar>> mItems;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechRecognition.cpp b/dom/media/webspeech/recognition/SpeechRecognition.cpp
new file mode 100644
index 0000000000..e3bf531218
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognition.cpp
@@ -0,0 +1,1170 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognition.h"
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+
+#include "mozilla/dom/AudioStreamTrack.h"
+#include "mozilla/dom/BindingUtils.h"
+#include "mozilla/dom/Element.h"
+#include "mozilla/dom/SpeechRecognitionBinding.h"
+#include "mozilla/dom/MediaStreamTrackBinding.h"
+#include "mozilla/dom/MediaStreamError.h"
+#include "mozilla/dom/RootedDictionary.h"
+#include "mozilla/dom/SpeechGrammar.h"
+#include "mozilla/MediaManager.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ResultVariant.h"
+#include "mozilla/Services.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/AbstractThread.h"
+#include "VideoUtils.h"
+#include "AudioSegment.h"
+#include "MediaEnginePrefs.h"
+#include "endpointer.h"
+
+#include "mozilla/dom/SpeechRecognitionEvent.h"
+#include "nsComponentManagerUtils.h"
+#include "nsContentUtils.h"
+#include "mozilla/dom/Document.h"
+#include "nsIObserverService.h"
+#include "nsIPermissionManager.h"
+#include "nsIPrincipal.h"
+#include "nsPIDOMWindow.h"
+#include "nsServiceManagerUtils.h"
+#include "nsQueryObject.h"
+#include "SpeechTrackListener.h"
+
+#include <algorithm>
+
+// Undo the windows.h damage
+#if defined(XP_WIN) && defined(GetMessage)
+#  undef GetMessage
+#endif
+
+namespace mozilla::dom {
+
+#define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
+#define DEFAULT_RECOGNITION_SERVICE "online"
+
+#define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
+#define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH \
+  "media.webspeech.long_silence_length"
+#define PREFERENCE_ENDPOINTER_LONG_SPEECH_LENGTH \
+  "media.webspeech.long_speech_length"
+#define PREFERENCE_SPEECH_DETECTION_TIMEOUT_MS \
+  "media.webspeech.recognition.timeout"
+
+static const uint32_t kSAMPLE_RATE = 16000;
+
+// number of frames corresponding to 300ms of audio to send to endpointer while
+// it's in environment estimation mode
+// kSAMPLE_RATE frames = 1s, kESTIMATION_FRAMES frames = 300ms
+static const uint32_t kESTIMATION_SAMPLES = 300 * kSAMPLE_RATE / 1000;
+
+LogModule* GetSpeechRecognitionLog() {
+  static LazyLogModule sLog("SpeechRecognition");
+  return sLog;
+}
+#define SR_LOG(...) \
+  MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
+
+namespace {
+class SpeechRecognitionShutdownBlocker : public media::ShutdownBlocker {
+ public:
+  SpeechRecognitionShutdownBlocker(SpeechRecognition* aRecognition,
+                                   const nsString& aName)
+      : media::ShutdownBlocker(aName), mRecognition(aRecognition) {}
+
+  NS_IMETHOD BlockShutdown(nsIAsyncShutdownClient*) override {
+    MOZ_ASSERT(NS_IsMainThread());
+    // AbortSilently will eventually clear the blocker.
+    mRecognition->Abort();
+    return NS_OK;
+  }
+
+ private:
+  const RefPtr<SpeechRecognition> mRecognition;
+};
+
+enum class ServiceCreationError {
+  ServiceNotFound,
+};
+
+Result<nsCOMPtr<nsISpeechRecognitionService>, ServiceCreationError>
+CreateSpeechRecognitionService(nsPIDOMWindowInner* aWindow,
+                               SpeechRecognition* aRecognition,
+                               const nsAString& aLang) {
+  nsAutoCString speechRecognitionServiceCID;
+
+  nsAutoCString prefValue;
+  Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_SERVICE, prefValue);
+  nsAutoCString speechRecognitionService;
+
+  if (!prefValue.IsEmpty()) {
+    speechRecognitionService = prefValue;
+  } else {
+    speechRecognitionService = DEFAULT_RECOGNITION_SERVICE;
+  }
+
+  if (StaticPrefs::media_webspeech_test_fake_recognition_service()) {
+    speechRecognitionServiceCID =
+        NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake";
+  } else {
+    speechRecognitionServiceCID =
+        nsLiteralCString(NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX) +
+        speechRecognitionService;
+  }
+
+  nsresult rv;
+  nsCOMPtr<nsISpeechRecognitionService> recognitionService;
+  recognitionService =
+      do_CreateInstance(speechRecognitionServiceCID.get(), &rv);
+  if (!recognitionService) {
+    return Err(ServiceCreationError::ServiceNotFound);
+  }
+
+  return recognitionService;
+}
+}  // namespace
+
+NS_IMPL_CYCLE_COLLECTION_WEAK_PTR_INHERITED(SpeechRecognition,
+                                            DOMEventTargetHelper, mStream,
+                                            mTrack, mRecognitionService,
+                                            mSpeechGrammarList)
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognition)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
+
+NS_IMPL_ADDREF_INHERITED(SpeechRecognition, DOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(SpeechRecognition, DOMEventTargetHelper)
+
+SpeechRecognition::SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow)
+    : DOMEventTargetHelper(aOwnerWindow),
+      mEndpointer(kSAMPLE_RATE),
+      mAudioSamplesPerChunk(mEndpointer.FrameSize()),
+      mSpeechDetectionTimer(NS_NewTimer()),
+      mSpeechGrammarList(new SpeechGrammarList(GetOwner())),
+      mContinuous(false),
+      mInterimResults(false),
+      mMaxAlternatives(1) {
+  SR_LOG("created SpeechRecognition");
+
+  if (StaticPrefs::media_webspeech_test_enable()) {
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
+    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false);
+  }
+
+  mEndpointer.set_speech_input_complete_silence_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 1250000));
+  mEndpointer.set_long_speech_input_complete_silence_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH, 2500000));
+  mEndpointer.set_long_speech_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 3 * 1000000));
+
+  mSpeechDetectionTimeoutMs =
+      Preferences::GetInt(PREFERENCE_SPEECH_DETECTION_TIMEOUT_MS, 10000);
+
+  Reset();
+}
+
+SpeechRecognition::~SpeechRecognition() = default;
+
+bool SpeechRecognition::StateBetween(FSMState begin, FSMState end) {
+  return mCurrentState >= begin && mCurrentState <= end;
+}
+
+void SpeechRecognition::SetState(FSMState state) {
+  mCurrentState = state;
+  SR_LOG("Transitioned to state %s", GetName(mCurrentState));
+}
+
+JSObject* SpeechRecognition::WrapObject(JSContext* aCx,
+                                        JS::Handle<JSObject*> aGivenProto) {
+  return SpeechRecognition_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+bool SpeechRecognition::IsAuthorized(JSContext* aCx, JSObject* aGlobal) {
+  nsCOMPtr<nsIPrincipal> principal = nsContentUtils::ObjectPrincipal(aGlobal);
+
+  nsresult rv;
+  nsCOMPtr<nsIPermissionManager> mgr =
+      do_GetService(NS_PERMISSIONMANAGER_CONTRACTID, &rv);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    return false;
+  }
+
+  uint32_t speechRecognition = nsIPermissionManager::UNKNOWN_ACTION;
+  rv = mgr->TestExactPermissionFromPrincipal(principal, "speech-recognition"_ns,
+                                             &speechRecognition);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    return false;
+  }
+
+  bool hasPermission =
+      (speechRecognition == nsIPermissionManager::ALLOW_ACTION);
+
+  return (hasPermission ||
+          StaticPrefs::media_webspeech_recognition_force_enable() ||
+          StaticPrefs::media_webspeech_test_enable()) &&
+         StaticPrefs::media_webspeech_recognition_enable();
+}
+
+already_AddRefed<SpeechRecognition> SpeechRecognition::Constructor(
+    const GlobalObject& aGlobal, ErrorResult& aRv) {
+  nsCOMPtr<nsPIDOMWindowInner> win = do_QueryInterface(aGlobal.GetAsSupports());
+  if (!win) {
+    aRv.Throw(NS_ERROR_FAILURE);
+    return nullptr;
+  }
+
+  RefPtr<SpeechRecognition> object = new SpeechRecognition(win);
+  return object.forget();
+}
+
+void SpeechRecognition::ProcessEvent(SpeechEvent* aEvent) {
+  SR_LOG("Processing %s, current state is %s", GetName(aEvent),
+         GetName(mCurrentState));
+
+  if (mAborted && aEvent->mType != EVENT_ABORT) {
+    // ignore all events while aborting
+    return;
+  }
+
+  Transition(aEvent);
+}
+
+void SpeechRecognition::Transition(SpeechEvent* aEvent) {
+  switch (mCurrentState) {
+    case STATE_IDLE:
+      switch (aEvent->mType) {
+        case EVENT_START:
+          // TODO: may want to time out if we wait too long
+          // for user to approve
+          WaitForAudioData(aEvent);
+          break;
+        case EVENT_STOP:
+        case EVENT_ABORT:
+        case EVENT_AUDIO_DATA:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          DoNothing(aEvent);
+          break;
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          AbortError(aEvent);
+          break;
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    case STATE_STARTING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          StartedAudioCapture(aEvent);
+          break;
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          AbortError(aEvent);
+          break;
+        case EVENT_ABORT:
+          AbortSilently(aEvent);
+          break;
+        case EVENT_STOP:
+          ResetAndEnd();
+          break;
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          DoNothing(aEvent);
+          break;
+        case EVENT_START:
+          SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
+          MOZ_CRASH();
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    case STATE_ESTIMATING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          WaitForEstimation(aEvent);
+          break;
+        case EVENT_STOP:
+          StopRecordingAndRecognize(aEvent);
+          break;
+        case EVENT_ABORT:
+          AbortSilently(aEvent);
+          break;
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          DoNothing(aEvent);
+          break;
+        case EVENT_AUDIO_ERROR:
+          AbortError(aEvent);
+          break;
+        case EVENT_START:
+          SR_LOG("STATE_ESTIMATING: Unhandled event %d", aEvent->mType);
+          MOZ_CRASH();
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    case STATE_WAITING_FOR_SPEECH:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          DetectSpeech(aEvent);
+          break;
+        case EVENT_STOP:
+          StopRecordingAndRecognize(aEvent);
+          break;
+        case EVENT_ABORT:
+          AbortSilently(aEvent);
+          break;
+        case EVENT_AUDIO_ERROR:
+          AbortError(aEvent);
+          break;
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          DoNothing(aEvent);
+          break;
+        case EVENT_START:
+          SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
+          MOZ_CRASH();
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    case STATE_RECOGNIZING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          WaitForSpeechEnd(aEvent);
+          break;
+        case EVENT_STOP:
+          StopRecordingAndRecognize(aEvent);
+          break;
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          AbortError(aEvent);
+          break;
+        case EVENT_ABORT:
+          AbortSilently(aEvent);
+          break;
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+          DoNothing(aEvent);
+          break;
+        case EVENT_START:
+          SR_LOG("STATE_RECOGNIZING: Unhandled aEvent %s", GetName(aEvent));
+          MOZ_CRASH();
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    case STATE_WAITING_FOR_RESULT:
+      switch (aEvent->mType) {
+        case EVENT_STOP:
+          DoNothing(aEvent);
+          break;
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          AbortError(aEvent);
+          break;
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          NotifyFinalResult(aEvent);
+          break;
+        case EVENT_AUDIO_DATA:
+          DoNothing(aEvent);
+          break;
+        case EVENT_ABORT:
+          AbortSilently(aEvent);
+          break;
+        case EVENT_START:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+          SR_LOG("STATE_WAITING_FOR_RESULT: Unhandled aEvent %s",
+                 GetName(aEvent));
+          MOZ_CRASH();
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    case STATE_ABORTING:
+      switch (aEvent->mType) {
+        case EVENT_STOP:
+        case EVENT_ABORT:
+        case EVENT_AUDIO_DATA:
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          DoNothing(aEvent);
+          break;
+        case EVENT_START:
+          SR_LOG("STATE_ABORTING: Unhandled aEvent %s", GetName(aEvent));
+          MOZ_CRASH();
+        default:
+          MOZ_CRASH("Invalid event");
+      }
+      break;
+    default:
+      MOZ_CRASH("Invalid state");
+  }
+}
+
+/*
+ * Handle a segment of recorded audio data.
+ * Returns the number of samples that were processed.
+ */
+uint32_t SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment,
+                                                TrackRate aTrackRate) {
+  AudioSegment::ChunkIterator iterator(*aSegment);
+  uint32_t samples = 0;
+  while (!iterator.IsEnded()) {
+    float out;
+    mEndpointer.ProcessAudio(*iterator, &out);
+    samples += iterator->GetDuration();
+    iterator.Next();
+  }
+
+  // we need to call the nsISpeechRecognitionService::ProcessAudioSegment
+  // in a separate thread so that any eventual encoding or pre-processing
+  // of the audio does not block the main thread
+  nsresult rv = mEncodeTaskQueue->Dispatch(
+      NewRunnableMethod<StoreCopyPassByPtr<AudioSegment>, TrackRate>(
+          "nsISpeechRecognitionService::ProcessAudioSegment",
+          mRecognitionService,
+          &nsISpeechRecognitionService::ProcessAudioSegment,
+          std::move(*aSegment), aTrackRate));
+  MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+  Unused << rv;
+  return samples;
+}
+
+/****************************************************************************
+ * FSM Transition functions
+ *
+ * If a transition function may cause a DOM event to be fired,
+ * it may also be re-entered, since the event handler may cause the
+ * event loop to spin and new SpeechEvents to be processed.
+ *
+ * Rules:
+ * 1) These methods should call SetState as soon as possible.
+ * 2) If these methods dispatch DOM events, or call methods that dispatch
+ * DOM events, that should be done as late as possible.
+ * 3) If anything must happen after dispatching a DOM event, make sure
+ * the state is still what the method expected it to be.
+ ****************************************************************************/
+
+void SpeechRecognition::Reset() {
+  SetState(STATE_IDLE);
+
+  // This breaks potential ref-cycles.
+  mRecognitionService = nullptr;
+
+  ++mStreamGeneration;
+  if (mStream) {
+    mStream->UnregisterTrackListener(this);
+    mStream = nullptr;
+  }
+  mTrack = nullptr;
+  mTrackIsOwned = false;
+  mStopRecordingPromise = nullptr;
+  mEncodeTaskQueue = nullptr;
+  mEstimationSamples = 0;
+  mBufferedSamples = 0;
+  mSpeechDetectionTimer->Cancel();
+  mAborted = false;
+}
+
+void SpeechRecognition::ResetAndEnd() {
+  Reset();
+  DispatchTrustedEvent(u"end"_ns);
+}
+
+void SpeechRecognition::WaitForAudioData(SpeechEvent* aEvent) {
+  SetState(STATE_STARTING);
+}
+
+void SpeechRecognition::StartedAudioCapture(SpeechEvent* aEvent) {
+  SetState(STATE_ESTIMATING);
+
+  mEndpointer.SetEnvironmentEstimationMode();
+  mEstimationSamples +=
+      ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
+
+  DispatchTrustedEvent(u"audiostart"_ns);
+  if (mCurrentState == STATE_ESTIMATING) {
+    DispatchTrustedEvent(u"start"_ns);
+  }
+}
+
+void SpeechRecognition::StopRecordingAndRecognize(SpeechEvent* aEvent) {
+  SetState(STATE_WAITING_FOR_RESULT);
+
+  MOZ_ASSERT(mRecognitionService, "Service deleted before recording done");
+
+  // This will run SoundEnd on the service just before StopRecording begins
+  // shutting the encode thread down.
+  mSpeechListener->mRemovedPromise->Then(
+      GetCurrentSerialEventTarget(), __func__,
+      [service = mRecognitionService] { service->SoundEnd(); });
+
+  StopRecording();
+}
+
+void SpeechRecognition::WaitForEstimation(SpeechEvent* aEvent) {
+  SetState(STATE_ESTIMATING);
+
+  mEstimationSamples +=
+      ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
+  if (mEstimationSamples > kESTIMATION_SAMPLES) {
+    mEndpointer.SetUserInputMode();
+    SetState(STATE_WAITING_FOR_SPEECH);
+  }
+}
+
+void SpeechRecognition::DetectSpeech(SpeechEvent* aEvent) {
+  SetState(STATE_WAITING_FOR_SPEECH);
+
+  ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
+  if (mEndpointer.DidStartReceivingSpeech()) {
+    mSpeechDetectionTimer->Cancel();
+    SetState(STATE_RECOGNIZING);
+    DispatchTrustedEvent(u"speechstart"_ns);
+  }
+}
+
+void SpeechRecognition::WaitForSpeechEnd(SpeechEvent* aEvent) {
+  SetState(STATE_RECOGNIZING);
+
+  ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
+  if (mEndpointer.speech_input_complete()) {
+    DispatchTrustedEvent(u"speechend"_ns);
+
+    if (mCurrentState == STATE_RECOGNIZING) {
+      // FIXME: StopRecordingAndRecognize should only be called for single
+      // shot services for continuous we should just inform the service
+      StopRecordingAndRecognize(aEvent);
+    }
+  }
+}
+
+void SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent) {
+  ResetAndEnd();
+
+  RootedDictionary<SpeechRecognitionEventInit> init(RootingCx());
+  init.mBubbles = true;
+  init.mCancelable = false;
+  // init.mResultIndex = 0;
+  init.mResults = aEvent->mRecognitionResultList;
+  init.mInterpretation = JS::NullValue();
+  // init.mEmma = nullptr;
+
+  RefPtr<SpeechRecognitionEvent> event =
+      SpeechRecognitionEvent::Constructor(this, u"result"_ns, init);
+  event->SetTrusted(true);
+
+  DispatchEvent(*event);
+}
+
+void SpeechRecognition::DoNothing(SpeechEvent* aEvent) {}
+
+void SpeechRecognition::AbortSilently(SpeechEvent* aEvent) {
+  if (mRecognitionService) {
+    if (mTrack) {
+      // This will run Abort on the service just before StopRecording begins
+      // shutting the encode thread down.
+      mSpeechListener->mRemovedPromise->Then(
+          GetCurrentSerialEventTarget(), __func__,
+          [service = mRecognitionService] { service->Abort(); });
+    } else {
+      // Recording hasn't started yet. We can just call Abort().
+      mRecognitionService->Abort();
+    }
+  }
+
+  StopRecording()->Then(
+      GetCurrentSerialEventTarget(), __func__,
+      [self = RefPtr<SpeechRecognition>(this), this] { ResetAndEnd(); });
+
+  SetState(STATE_ABORTING);
+}
+
+void SpeechRecognition::AbortError(SpeechEvent* aEvent) {
+  AbortSilently(aEvent);
+  NotifyError(aEvent);
+}
+
+void SpeechRecognition::NotifyError(SpeechEvent* aEvent) {
+  aEvent->mError->SetTrusted(true);
+
+  DispatchEvent(*aEvent->mError);
+}
+
+/**************************************
+ * Event triggers and other functions *
+ **************************************/
+NS_IMETHODIMP
+SpeechRecognition::StartRecording(RefPtr<AudioStreamTrack>& aTrack) {
+  // hold a reference so that the underlying track doesn't get collected.
+  mTrack = aTrack;
+  MOZ_ASSERT(!mTrack->Ended());
+
+  mSpeechListener = new SpeechTrackListener(this);
+  mTrack->AddListener(mSpeechListener);
+
+  nsString blockerName;
+  blockerName.AppendPrintf("SpeechRecognition %p shutdown", this);
+  mShutdownBlocker =
+      MakeAndAddRef<SpeechRecognitionShutdownBlocker>(this, blockerName);
+  media::MustGetShutdownBarrier()->AddBlocker(
+      mShutdownBlocker, NS_LITERAL_STRING_FROM_CSTRING(__FILE__), __LINE__,
+      u"SpeechRecognition shutdown"_ns);
+
+  mEndpointer.StartSession();
+
+  return mSpeechDetectionTimer->Init(this, mSpeechDetectionTimeoutMs,
+                                     nsITimer::TYPE_ONE_SHOT);
+}
+
+RefPtr<GenericNonExclusivePromise> SpeechRecognition::StopRecording() {
+  if (!mTrack) {
+    // Recording wasn't started, or has already been stopped.
+    if (mStream) {
+      // Ensure we don't start recording because a track became available
+      // before we get reset.
+      mStream->UnregisterTrackListener(this);
+    }
+    return GenericNonExclusivePromise::CreateAndResolve(true, __func__);
+  }
+
+  if (mStopRecordingPromise) {
+    return mStopRecordingPromise;
+  }
+
+  mTrack->RemoveListener(mSpeechListener);
+  if (mTrackIsOwned) {
+    mTrack->Stop();
+  }
+
+  mEndpointer.EndSession();
+  DispatchTrustedEvent(u"audioend"_ns);
+
+  // Block shutdown until the speech track listener has been removed from the
+  // MSG, as it holds a reference to us, and we reference the world, which we
+  // don't want to leak.
+  mStopRecordingPromise =
+      mSpeechListener->mRemovedPromise
+          ->Then(
+              GetCurrentSerialEventTarget(), __func__,
+              [self = RefPtr<SpeechRecognition>(this), this] {
+                SR_LOG("Shutting down encoding thread");
+                return mEncodeTaskQueue->BeginShutdown();
+              },
+              [] {
+                MOZ_CRASH("Unexpected rejection");
+                return ShutdownPromise::CreateAndResolve(false, __func__);
+              })
+          ->Then(
+              GetCurrentSerialEventTarget(), __func__,
+              [self = RefPtr<SpeechRecognition>(this), this] {
+                media::MustGetShutdownBarrier()->RemoveBlocker(
+                    mShutdownBlocker);
+                mShutdownBlocker = nullptr;
+
+                MOZ_DIAGNOSTIC_ASSERT(mCurrentState != STATE_IDLE);
+                return GenericNonExclusivePromise::CreateAndResolve(true,
+                                                                    __func__);
+              },
+              [] {
+                MOZ_CRASH("Unexpected rejection");
+                return GenericNonExclusivePromise::CreateAndResolve(false,
+                                                                    __func__);
+              });
+  return mStopRecordingPromise;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::Observe(nsISupports* aSubject, const char* aTopic,
+                           const char16_t* aData) {
+  MOZ_ASSERT(NS_IsMainThread(), "Observer invoked off the main thread");
+
+  if (!strcmp(aTopic, NS_TIMER_CALLBACK_TOPIC) &&
+      StateBetween(STATE_IDLE, STATE_WAITING_FOR_SPEECH)) {
+    DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR,
+                  SpeechRecognitionErrorCode::No_speech,
+                  "No speech detected (timeout)");
+  } else if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) {
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC);
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC);
+  } else if (StaticPrefs::media_webspeech_test_fake_fsm_events() &&
+             !strcmp(aTopic, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC)) {
+    ProcessTestEventRequest(aSubject, nsDependentString(aData));
+  }
+
+  return NS_OK;
+}
+
+void SpeechRecognition::ProcessTestEventRequest(nsISupports* aSubject,
+                                                const nsAString& aEventName) {
+  if (aEventName.EqualsLiteral("EVENT_ABORT")) {
+    Abort();
+  } else if (aEventName.EqualsLiteral("EVENT_AUDIO_ERROR")) {
+    DispatchError(
+        SpeechRecognition::EVENT_AUDIO_ERROR,
+        SpeechRecognitionErrorCode::Audio_capture,  // TODO different codes?
+        "AUDIO_ERROR test event");
+  } else {
+    NS_ASSERTION(StaticPrefs::media_webspeech_test_fake_recognition_service(),
+                 "Got request for fake recognition service event, but "
+                 "media.webspeech.test.fake_recognition_service is unset");
+
+    // let the fake recognition service handle the request
+  }
+}
+
+already_AddRefed<SpeechGrammarList> SpeechRecognition::Grammars() const {
+  RefPtr<SpeechGrammarList> speechGrammarList = mSpeechGrammarList;
+  return speechGrammarList.forget();
+}
+
+void SpeechRecognition::SetGrammars(SpeechGrammarList& aArg) {
+  mSpeechGrammarList = &aArg;
+}
+
+void SpeechRecognition::GetLang(nsString& aRetVal) const { aRetVal = mLang; }
+
+void SpeechRecognition::SetLang(const nsAString& aArg) { mLang = aArg; }
+
+bool SpeechRecognition::GetContinuous(ErrorResult& aRv) const {
+  return mContinuous;
+}
+
+void SpeechRecognition::SetContinuous(bool aArg, ErrorResult& aRv) {
+  mContinuous = aArg;
+}
+
+bool SpeechRecognition::InterimResults() const { return mInterimResults; }
+
+void SpeechRecognition::SetInterimResults(bool aArg) { mInterimResults = aArg; }
+
+uint32_t SpeechRecognition::MaxAlternatives() const { return mMaxAlternatives; }
+
+void SpeechRecognition::SetMaxAlternatives(uint32_t aArg) {
+  mMaxAlternatives = aArg;
+}
+
+void SpeechRecognition::GetServiceURI(nsString& aRetVal,
+                                      ErrorResult& aRv) const {
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+}
+
+void SpeechRecognition::SetServiceURI(const nsAString& aArg, ErrorResult& aRv) {
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+}
+
+void SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream,
+                              CallerType aCallerType, ErrorResult& aRv) {
+  if (mCurrentState != STATE_IDLE) {
+    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+    return;
+  }
+
+  if (!SetRecognitionService(aRv)) {
+    return;
+  }
+
+  if (!ValidateAndSetGrammarList(aRv)) {
+    return;
+  }
+
+  mEncodeTaskQueue =
+      TaskQueue::Create(GetMediaThreadPool(MediaThreadType::WEBRTC_WORKER),
+                        "WebSpeechEncoderThread");
+
+  nsresult rv;
+  rv = mRecognitionService->Initialize(this);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    return;
+  }
+
+  MediaStreamConstraints constraints;
+  constraints.mAudio.SetAsBoolean() = true;
+
+  if (aStream.WasPassed()) {
+    mStream = &aStream.Value();
+    mTrackIsOwned = false;
+    mStream->RegisterTrackListener(this);
+    nsTArray<RefPtr<AudioStreamTrack>> tracks;
+    mStream->GetAudioTracks(tracks);
+    for (const RefPtr<AudioStreamTrack>& track : tracks) {
+      if (!track->Ended()) {
+        NotifyTrackAdded(track);
+        break;
+      }
+    }
+  } else {
+    mTrackIsOwned = true;
+    nsPIDOMWindowInner* win = GetOwner();
+    if (!win || !win->IsFullyActive()) {
+      aRv.ThrowInvalidStateError("The document is not fully active.");
+      return;
+    }
+    AutoNoJSAPI nojsapi;
+    RefPtr<SpeechRecognition> self(this);
+    MediaManager::Get()
+        ->GetUserMedia(win, constraints, aCallerType)
+        ->Then(
+            GetCurrentSerialEventTarget(), __func__,
+            [this, self,
+             generation = mStreamGeneration](RefPtr<DOMMediaStream>&& aStream) {
+              nsTArray<RefPtr<AudioStreamTrack>> tracks;
+              aStream->GetAudioTracks(tracks);
+              if (mAborted || mCurrentState != STATE_STARTING ||
+                  mStreamGeneration != generation) {
+                // We were probably aborted. Exit early.
+                for (const RefPtr<AudioStreamTrack>& track : tracks) {
+                  track->Stop();
+                }
+                return;
+              }
+              mStream = std::move(aStream);
+              mStream->RegisterTrackListener(this);
+              for (const RefPtr<AudioStreamTrack>& track : tracks) {
+                if (!track->Ended()) {
+                  NotifyTrackAdded(track);
+                }
+              }
+            },
+            [this, self,
+             generation = mStreamGeneration](RefPtr<MediaMgrError>&& error) {
+              if (mAborted || mCurrentState != STATE_STARTING ||
+                  mStreamGeneration != generation) {
+                // We were probably aborted. Exit early.
+                return;
+              }
+              SpeechRecognitionErrorCode errorCode;
+
+              if (error->mName == MediaMgrError::Name::NotAllowedError) {
+                errorCode = SpeechRecognitionErrorCode::Not_allowed;
+              } else {
+                errorCode = SpeechRecognitionErrorCode::Audio_capture;
+              }
+              DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode,
+                            error->mMessage);
+            });
+  }
+
+  RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_START);
+  NS_DispatchToMainThread(event);
+}
+
+bool SpeechRecognition::SetRecognitionService(ErrorResult& aRv) {
+  if (!GetOwner()) {
+    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+    return false;
+  }
+
+  // See:
+  // https://dvcs.w3.org/hg/speech-api/raw-file/tip/webspeechapi.html#dfn-lang
+  nsAutoString lang;
+  if (!mLang.IsEmpty()) {
+    lang = mLang;
+  } else {
+    nsCOMPtr<Document> document = GetOwner()->GetExtantDoc();
+    if (!document) {
+      aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+      return false;
+    }
+    nsCOMPtr<Element> element = document->GetRootElement();
+    if (!element) {
+      aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+      return false;
+    }
+
+    nsAutoString lang;
+    element->GetLang(lang);
+  }
+
+  auto result = CreateSpeechRecognitionService(GetOwner(), this, lang);
+
+  if (result.isErr()) {
+    switch (result.unwrapErr()) {
+      case ServiceCreationError::ServiceNotFound:
+        aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+        break;
+      default:
+        MOZ_CRASH("Unknown error");
+    }
+    return false;
+  }
+
+  mRecognitionService = result.unwrap();
+  MOZ_DIAGNOSTIC_ASSERT(mRecognitionService);
+  return true;
+}
+
+bool SpeechRecognition::ValidateAndSetGrammarList(ErrorResult& aRv) {
+  if (!mSpeechGrammarList) {
+    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+    return false;
+  }
+
+  uint32_t grammarListLength = mSpeechGrammarList->Length();
+  for (uint32_t count = 0; count < grammarListLength; ++count) {
+    RefPtr<SpeechGrammar> speechGrammar = mSpeechGrammarList->Item(count, aRv);
+    if (aRv.Failed()) {
+      return false;
+    }
+    if (NS_FAILED(mRecognitionService->ValidateAndSetGrammarList(
+            speechGrammar.get(), nullptr))) {
+      aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void SpeechRecognition::Stop() {
+  RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_STOP);
+  NS_DispatchToMainThread(event);
+}
+
+void SpeechRecognition::Abort() {
+  if (mAborted) {
+    return;
+  }
+
+  mAborted = true;
+
+  RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_ABORT);
+  NS_DispatchToMainThread(event);
+}
+
+void SpeechRecognition::NotifyTrackAdded(
+    const RefPtr<MediaStreamTrack>& aTrack) {
+  if (mTrack) {
+    return;
+  }
+
+  RefPtr<AudioStreamTrack> audioTrack = aTrack->AsAudioStreamTrack();
+  if (!audioTrack) {
+    return;
+  }
+
+  if (audioTrack->Ended()) {
+    return;
+  }
+
+  StartRecording(audioTrack);
+}
+
+void SpeechRecognition::DispatchError(EventType aErrorType,
+                                      SpeechRecognitionErrorCode aErrorCode,
+                                      const nsACString& aMessage) {
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_ASSERT(aErrorType == EVENT_RECOGNITIONSERVICE_ERROR ||
+                 aErrorType == EVENT_AUDIO_ERROR,
+             "Invalid error type!");
+
+  RefPtr<SpeechRecognitionError> srError =
+      new SpeechRecognitionError(nullptr, nullptr, nullptr);
+
+  srError->InitSpeechRecognitionError(u"error"_ns, true, false, aErrorCode,
+                                      aMessage);
+
+  RefPtr<SpeechEvent> event = new SpeechEvent(this, aErrorType);
+  event->mError = srError;
+  NS_DispatchToMainThread(event);
+}
+
+/*
+ * Buffer audio samples into mAudioSamplesBuffer until aBufferSize.
+ * Updates mBufferedSamples and returns the number of samples that were
+ * buffered.
+ */
+uint32_t SpeechRecognition::FillSamplesBuffer(const int16_t* aSamples,
+                                              uint32_t aSampleCount) {
+  MOZ_ASSERT(mBufferedSamples < mAudioSamplesPerChunk);
+  MOZ_ASSERT(mAudioSamplesBuffer);
+
+  int16_t* samplesBuffer = static_cast<int16_t*>(mAudioSamplesBuffer->Data());
+  size_t samplesToCopy =
+      std::min(aSampleCount, mAudioSamplesPerChunk - mBufferedSamples);
+
+  PodCopy(samplesBuffer + mBufferedSamples, aSamples, samplesToCopy);
+
+  mBufferedSamples += samplesToCopy;
+  return samplesToCopy;
+}
+
+/*
+ * Split a samples buffer starting of a given size into
+ * chunks of equal size. The chunks are stored in the array
+ * received as argument.
+ * Returns the offset of the end of the last chunk that was
+ * created.
+ */
+uint32_t SpeechRecognition::SplitSamplesBuffer(
+    const int16_t* aSamplesBuffer, uint32_t aSampleCount,
+    nsTArray<RefPtr<SharedBuffer>>& aResult) {
+  uint32_t chunkStart = 0;
+
+  while (chunkStart + mAudioSamplesPerChunk <= aSampleCount) {
+    CheckedInt<size_t> bufferSize(sizeof(int16_t));
+    bufferSize *= mAudioSamplesPerChunk;
+    RefPtr<SharedBuffer> chunk = SharedBuffer::Create(bufferSize);
+
+    PodCopy(static_cast<short*>(chunk->Data()), aSamplesBuffer + chunkStart,
+            mAudioSamplesPerChunk);
+
+    aResult.AppendElement(chunk.forget());
+    chunkStart += mAudioSamplesPerChunk;
+  }
+
+  return chunkStart;
+}
+
+AudioSegment* SpeechRecognition::CreateAudioSegment(
+    nsTArray<RefPtr<SharedBuffer>>& aChunks) {
+  AudioSegment* segment = new AudioSegment();
+  for (uint32_t i = 0; i < aChunks.Length(); ++i) {
+    RefPtr<SharedBuffer> buffer = aChunks[i];
+    const int16_t* chunkData = static_cast<const int16_t*>(buffer->Data());
+
+    AutoTArray<const int16_t*, 1> channels;
+    channels.AppendElement(chunkData);
+    segment->AppendFrames(buffer.forget(), channels, mAudioSamplesPerChunk,
+                          PRINCIPAL_HANDLE_NONE);
+  }
+
+  return segment;
+}
+
+void SpeechRecognition::FeedAudioData(
+    nsMainThreadPtrHandle<SpeechRecognition>& aRecognition,
+    already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration,
+    MediaTrackListener* aProvider, TrackRate aTrackRate) {
+  NS_ASSERTION(!NS_IsMainThread(),
+               "FeedAudioData should not be called in the main thread");
+
+  // Endpointer expects to receive samples in chunks whose size is a
+  // multiple of its frame size.
+  // Since we can't assume we will receive the frames in appropriate-sized
+  // chunks, we must buffer and split them in chunks of mAudioSamplesPerChunk
+  // (a multiple of Endpointer's frame size) before feeding to Endpointer.
+
+  // ensure aSamples is deleted
+  RefPtr<SharedBuffer> refSamples = aSamples;
+
+  uint32_t samplesIndex = 0;
+  const int16_t* samples = static_cast<int16_t*>(refSamples->Data());
+  AutoTArray<RefPtr<SharedBuffer>, 5> chunksToSend;
+
+  // fill up our buffer and make a chunk out of it, if possible
+  if (mBufferedSamples > 0) {
+    samplesIndex += FillSamplesBuffer(samples, aDuration);
+
+    if (mBufferedSamples == mAudioSamplesPerChunk) {
+      chunksToSend.AppendElement(mAudioSamplesBuffer.forget());
+      mBufferedSamples = 0;
+    }
+  }
+
+  // create sample chunks of correct size
+  if (samplesIndex < aDuration) {
+    samplesIndex += SplitSamplesBuffer(samples + samplesIndex,
+                                       aDuration - samplesIndex, chunksToSend);
+  }
+
+  // buffer remaining samples
+  if (samplesIndex < aDuration) {
+    mBufferedSamples = 0;
+    CheckedInt<size_t> bufferSize(sizeof(int16_t));
+    bufferSize *= mAudioSamplesPerChunk;
+    mAudioSamplesBuffer = SharedBuffer::Create(bufferSize);
+
+    FillSamplesBuffer(samples + samplesIndex, aDuration - samplesIndex);
+  }
+
+  AudioSegment* segment = CreateAudioSegment(chunksToSend);
+  RefPtr<SpeechEvent> event = new SpeechEvent(aRecognition, EVENT_AUDIO_DATA);
+  event->mAudioSegment = segment;
+  event->mProvider = aProvider;
+  event->mTrackRate = aTrackRate;
+  NS_DispatchToMainThread(event);
+}
+
+const char* SpeechRecognition::GetName(FSMState aId) {
+  static const char* names[] = {
+      "STATE_IDLE",        "STATE_STARTING",
+      "STATE_ESTIMATING",  "STATE_WAITING_FOR_SPEECH",
+      "STATE_RECOGNIZING", "STATE_WAITING_FOR_RESULT",
+      "STATE_ABORTING",
+  };
+
+  MOZ_ASSERT(aId < STATE_COUNT);
+  MOZ_ASSERT(ArrayLength(names) == STATE_COUNT);
+  return names[aId];
+}
+
+const char* SpeechRecognition::GetName(SpeechEvent* aEvent) {
+  static const char* names[] = {"EVENT_START",
+                                "EVENT_STOP",
+                                "EVENT_ABORT",
+                                "EVENT_AUDIO_DATA",
+                                "EVENT_AUDIO_ERROR",
+                                "EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT",
+                                "EVENT_RECOGNITIONSERVICE_FINAL_RESULT",
+                                "EVENT_RECOGNITIONSERVICE_ERROR"};
+
+  MOZ_ASSERT(aEvent->mType < EVENT_COUNT);
+  MOZ_ASSERT(ArrayLength(names) == EVENT_COUNT);
+  return names[aEvent->mType];
+}
+
+TaskQueue* SpeechRecognition::GetTaskQueueForEncoding() const {
+  MOZ_ASSERT(NS_IsMainThread());
+  return mEncodeTaskQueue;
+}
+
+SpeechEvent::SpeechEvent(SpeechRecognition* aRecognition,
+                         SpeechRecognition::EventType aType)
+    : Runnable("dom::SpeechEvent"),
+      mAudioSegment(nullptr),
+      mRecognitionResultList(nullptr),
+      mError(nullptr),
+      mRecognition(new nsMainThreadPtrHolder<SpeechRecognition>(
+          "SpeechEvent::SpeechEvent", aRecognition)),
+      mType(aType),
+      mTrackRate(0) {}
+
+SpeechEvent::SpeechEvent(nsMainThreadPtrHandle<SpeechRecognition>& aRecognition,
+                         SpeechRecognition::EventType aType)
+    : Runnable("dom::SpeechEvent"),
+      mAudioSegment(nullptr),
+      mRecognitionResultList(nullptr),
+      mError(nullptr),
+      mRecognition(aRecognition),
+      mType(aType),
+      mTrackRate(0) {}
+
+SpeechEvent::~SpeechEvent() { delete mAudioSegment; }
+
+NS_IMETHODIMP
+SpeechEvent::Run() {
+  mRecognition->ProcessEvent(this);
+  return NS_OK;
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechRecognition.h b/dom/media/webspeech/recognition/SpeechRecognition.h
new file mode 100644
index 0000000000..687f38041e
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognition.h
@@ -0,0 +1,314 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechRecognition_h
+#define mozilla_dom_SpeechRecognition_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/DOMEventTargetHelper.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "nsTArray.h"
+#include "js/TypeDecls.h"
+#include "nsProxyRelease.h"
+#include "DOMMediaStream.h"
+#include "nsITimer.h"
+#include "MediaTrackGraph.h"
+#include "AudioSegment.h"
+#include "mozilla/WeakPtr.h"
+
+#include "SpeechGrammarList.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsISpeechRecognitionService.h"
+#include "endpointer.h"
+
+#include "mozilla/dom/BindingDeclarations.h"
+#include "mozilla/dom/SpeechRecognitionError.h"
+
+namespace mozilla {
+
+namespace media {
+class ShutdownBlocker;
+}
+
+namespace dom {
+
+#define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC \
+  "SpeechRecognitionTest:RequestEvent"
+#define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End"
+
+class GlobalObject;
+class AudioStreamTrack;
+class SpeechEvent;
+class SpeechTrackListener;
+
+LogModule* GetSpeechRecognitionLog();
+#define SR_LOG(...) \
+  MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
+
+class SpeechRecognition final : public DOMEventTargetHelper,
+                                public nsIObserver,
+                                public DOMMediaStream::TrackListener,
+                                public SupportsWeakPtr {
+ public:
+  explicit SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow);
+
+  NS_DECL_ISUPPORTS_INHERITED
+  NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechRecognition,
+                                           DOMEventTargetHelper)
+
+  NS_DECL_NSIOBSERVER
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  static bool IsAuthorized(JSContext* aCx, JSObject* aGlobal);
+
+  static already_AddRefed<SpeechRecognition> Constructor(
+      const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  static already_AddRefed<SpeechRecognition> WebkitSpeechRecognition(
+      const GlobalObject& aGlobal, ErrorResult& aRv) {
+    return Constructor(aGlobal, aRv);
+  }
+
+  already_AddRefed<SpeechGrammarList> Grammars() const;
+
+  void SetGrammars(mozilla::dom::SpeechGrammarList& aArg);
+
+  void GetLang(nsString& aRetVal) const;
+
+  void SetLang(const nsAString& aArg);
+
+  bool GetContinuous(ErrorResult& aRv) const;
+
+  void SetContinuous(bool aArg, ErrorResult& aRv);
+
+  bool InterimResults() const;
+
+  void SetInterimResults(bool aArg);
+
+  uint32_t MaxAlternatives() const;
+
+  TaskQueue* GetTaskQueueForEncoding() const;
+
+  void SetMaxAlternatives(uint32_t aArg);
+
+  void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
+
+  void Start(const Optional<NonNull<DOMMediaStream>>& aStream,
+             CallerType aCallerType, ErrorResult& aRv);
+
+  void Stop();
+
+  void Abort();
+
+  IMPL_EVENT_HANDLER(audiostart)
+  IMPL_EVENT_HANDLER(soundstart)
+  IMPL_EVENT_HANDLER(speechstart)
+  IMPL_EVENT_HANDLER(speechend)
+  IMPL_EVENT_HANDLER(soundend)
+  IMPL_EVENT_HANDLER(audioend)
+  IMPL_EVENT_HANDLER(result)
+  IMPL_EVENT_HANDLER(nomatch)
+  IMPL_EVENT_HANDLER(error)
+  IMPL_EVENT_HANDLER(start)
+  IMPL_EVENT_HANDLER(end)
+
+  enum EventType {
+    EVENT_START,
+    EVENT_STOP,
+    EVENT_ABORT,
+    EVENT_AUDIO_DATA,
+    EVENT_AUDIO_ERROR,
+    EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
+    EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
+    EVENT_RECOGNITIONSERVICE_ERROR,
+    EVENT_COUNT
+  };
+
+  void NotifyTrackAdded(const RefPtr<MediaStreamTrack>& aTrack) override;
+  // aMessage should be valid UTF-8, but invalid UTF-8 byte sequences are
+  // replaced with the REPLACEMENT CHARACTER on conversion to UTF-16.
+  void DispatchError(EventType aErrorType,
+                     SpeechRecognitionErrorCode aErrorCode,
+                     const nsACString& aMessage);
+  template <int N>
+  void DispatchError(EventType aErrorType,
+                     SpeechRecognitionErrorCode aErrorCode,
+                     const char (&aMessage)[N]) {
+    DispatchError(aErrorType, aErrorCode, nsLiteralCString(aMessage));
+  }
+  uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);
+  uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer,
+                              uint32_t aSampleCount,
+                              nsTArray<RefPtr<SharedBuffer>>& aResult);
+  AudioSegment* CreateAudioSegment(nsTArray<RefPtr<SharedBuffer>>& aChunks);
+  void FeedAudioData(nsMainThreadPtrHandle<SpeechRecognition>& aRecognition,
+                     already_AddRefed<SharedBuffer> aSamples,
+                     uint32_t aDuration, MediaTrackListener* aProvider,
+                     TrackRate aTrackRate);
+
+  friend class SpeechEvent;
+
+ private:
+  virtual ~SpeechRecognition();
+
+  enum FSMState {
+    STATE_IDLE,
+    STATE_STARTING,
+    STATE_ESTIMATING,
+    STATE_WAITING_FOR_SPEECH,
+    STATE_RECOGNIZING,
+    STATE_WAITING_FOR_RESULT,
+    STATE_ABORTING,
+    STATE_COUNT
+  };
+
+  void SetState(FSMState state);
+  bool StateBetween(FSMState begin, FSMState end);
+
+  bool SetRecognitionService(ErrorResult& aRv);
+  bool ValidateAndSetGrammarList(ErrorResult& aRv);
+
+  NS_IMETHOD StartRecording(RefPtr<AudioStreamTrack>& aDOMStream);
+  RefPtr<GenericNonExclusivePromise> StopRecording();
+
+  uint32_t ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate);
+  void NotifyError(SpeechEvent* aEvent);
+
+  void ProcessEvent(SpeechEvent* aEvent);
+  void Transition(SpeechEvent* aEvent);
+
+  void Reset();
+  void ResetAndEnd();
+  void WaitForAudioData(SpeechEvent* aEvent);
+  void StartedAudioCapture(SpeechEvent* aEvent);
+  void StopRecordingAndRecognize(SpeechEvent* aEvent);
+  void WaitForEstimation(SpeechEvent* aEvent);
+  void DetectSpeech(SpeechEvent* aEvent);
+  void WaitForSpeechEnd(SpeechEvent* aEvent);
+  void NotifyFinalResult(SpeechEvent* aEvent);
+  void DoNothing(SpeechEvent* aEvent);
+  void AbortSilently(SpeechEvent* aEvent);
+  void AbortError(SpeechEvent* aEvent);
+
+  RefPtr<DOMMediaStream> mStream;
+  RefPtr<AudioStreamTrack> mTrack;
+  bool mTrackIsOwned = false;
+  RefPtr<GenericNonExclusivePromise> mStopRecordingPromise;
+  RefPtr<SpeechTrackListener> mSpeechListener;
+  nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
+  RefPtr<media::ShutdownBlocker> mShutdownBlocker;
+  // TaskQueue responsible for pre-processing the samples by the service
+  // it runs in a separate thread from the main thread
+  RefPtr<TaskQueue> mEncodeTaskQueue;
+
+  // A generation ID of the MediaStream a started session is for, so that
+  // a gUM request that resolves after the session has stopped, and a new
+  // one has started, can exit early. Main thread only. Can wrap.
+  uint8_t mStreamGeneration = 0;
+
+  FSMState mCurrentState;
+
+  Endpointer mEndpointer;
+  uint32_t mEstimationSamples;
+
+  uint32_t mAudioSamplesPerChunk;
+
+  // maximum amount of seconds the engine will wait for voice
+  // until returning a 'no speech detected' error
+  uint32_t mSpeechDetectionTimeoutMs;
+
+  // buffer holds one chunk of mAudioSamplesPerChunk
+  // samples before feeding it to mEndpointer
+  RefPtr<SharedBuffer> mAudioSamplesBuffer;
+  uint32_t mBufferedSamples;
+
+  nsCOMPtr<nsITimer> mSpeechDetectionTimer;
+  bool mAborted;
+
+  nsString mLang;
+
+  RefPtr<SpeechGrammarList> mSpeechGrammarList;
+
+  // private flag used to hold if the user called the setContinuous() method
+  // of the API
+  bool mContinuous;
+
+  // WebSpeechAPI (http://bit.ly/1gIl7DC) states:
+  //
+  // 1. Default value MUST be false
+  // 2. If true, interim results SHOULD be returned
+  // 3. If false, interim results MUST NOT be returned
+  //
+  // Pocketsphinx does not return interm results; so, defaulting
+  // mInterimResults to false, then ignoring its subsequent value
+  // is a conforming implementation.
+  bool mInterimResults;
+
+  // WebSpeechAPI (http://bit.ly/1JAiqeo) states:
+  //
+  // 1. Default value is 1
+  // 2. Subsequent value is the "maximum number of SpeechRecognitionAlternatives
+  // per result"
+  //
+  // Pocketsphinx can only return at maximum a single
+  // SpeechRecognitionAlternative per SpeechRecognitionResult. So defaulting
+  // mMaxAlternatives to 1, for all non zero values ignoring mMaxAlternatives
+  // while for a 0 value returning no SpeechRecognitionAlternative per result is
+  // a conforming implementation.
+  uint32_t mMaxAlternatives;
+
+  void ProcessTestEventRequest(nsISupports* aSubject,
+                               const nsAString& aEventName);
+
+  const char* GetName(FSMState aId);
+  const char* GetName(SpeechEvent* aEvent);
+};
+
+class SpeechEvent : public Runnable {
+ public:
+  SpeechEvent(SpeechRecognition* aRecognition,
+              SpeechRecognition::EventType aType);
+  SpeechEvent(nsMainThreadPtrHandle<SpeechRecognition>& aRecognition,
+              SpeechRecognition::EventType aType);
+
+  ~SpeechEvent();
+
+  NS_IMETHOD Run() override;
+  AudioSegment* mAudioSegment;
+  RefPtr<SpeechRecognitionResultList>
+      mRecognitionResultList;  // TODO: make this a session being passed which
+                               // also has index and stuff
+  RefPtr<SpeechRecognitionError> mError;
+
+  friend class SpeechRecognition;
+
+ private:
+  nsMainThreadPtrHandle<SpeechRecognition> mRecognition;
+
+  // for AUDIO_DATA events, keep a reference to the provider
+  // of the data (i.e., the SpeechTrackListener) to ensure it
+  // is kept alive (and keeps SpeechRecognition alive) until this
+  // event gets processed.
+  RefPtr<MediaTrackListener> mProvider;
+  SpeechRecognition::EventType mType;
+  TrackRate mTrackRate;
+};
+
+}  // namespace dom
+
+inline nsISupports* ToSupports(dom::SpeechRecognition* aRec) {
+  return ToSupports(static_cast<DOMEventTargetHelper*>(aRec));
+}
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
new file mode 100644
index 0000000000..4dee9090a7
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionAlternative.h"
+
+#include "mozilla/dom/SpeechRecognitionAlternativeBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechRecognitionAlternative, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionAlternative)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionAlternative)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionAlternative)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionAlternative::SpeechRecognitionAlternative(
+    SpeechRecognition* aParent)
+    : mConfidence(0), mParent(aParent) {}
+
+SpeechRecognitionAlternative::~SpeechRecognitionAlternative() = default;
+
+JSObject* SpeechRecognitionAlternative::WrapObject(
+    JSContext* aCx, JS::Handle<JSObject*> aGivenProto) {
+  return SpeechRecognitionAlternative_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+nsISupports* SpeechRecognitionAlternative::GetParentObject() const {
+  return static_cast<EventTarget*>(mParent.get());
+}
+
+void SpeechRecognitionAlternative::GetTranscript(nsString& aRetVal) const {
+  aRetVal = mTranscript;
+}
+
+float SpeechRecognitionAlternative::Confidence() const { return mConfidence; }
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechRecognitionAlternative.h b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.h
new file mode 100644
index 0000000000..017d869943
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.h
@@ -0,0 +1,49 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechRecognitionAlternative_h
+#define mozilla_dom_SpeechRecognitionAlternative_h
+
+#include "nsCycleCollectionParticipant.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "js/TypeDecls.h"
+
+#include "mozilla/Attributes.h"
+
+namespace mozilla::dom {
+
+class SpeechRecognition;
+
+class SpeechRecognitionAlternative final : public nsISupports,
+                                           public nsWrapperCache {
+ public:
+  explicit SpeechRecognitionAlternative(SpeechRecognition* aParent);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechRecognitionAlternative)
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  void GetTranscript(nsString& aRetVal) const;
+
+  float Confidence() const;
+
+  nsString mTranscript;
+  float mConfidence;
+
+ private:
+  ~SpeechRecognitionAlternative();
+
+  RefPtr<SpeechRecognition> mParent;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp b/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp
new file mode 100644
index 0000000000..009281b234
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionResult.h"
+#include "mozilla/dom/SpeechRecognitionResultBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechRecognitionResult, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionResult)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionResult)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionResult)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionResult::SpeechRecognitionResult(SpeechRecognition* aParent)
+    : mParent(aParent) {}
+
+SpeechRecognitionResult::~SpeechRecognitionResult() = default;
+
+JSObject* SpeechRecognitionResult::WrapObject(
+    JSContext* aCx, JS::Handle<JSObject*> aGivenProto) {
+  return SpeechRecognitionResult_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+nsISupports* SpeechRecognitionResult::GetParentObject() const {
+  return static_cast<EventTarget*>(mParent.get());
+}
+
+already_AddRefed<SpeechRecognitionAlternative>
+SpeechRecognitionResult::IndexedGetter(uint32_t aIndex, bool& aPresent) {
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+
+  aPresent = true;
+  return Item(aIndex);
+}
+
+uint32_t SpeechRecognitionResult::Length() const { return mItems.Length(); }
+
+already_AddRefed<SpeechRecognitionAlternative> SpeechRecognitionResult::Item(
+    uint32_t aIndex) {
+  RefPtr<SpeechRecognitionAlternative> alternative = mItems.ElementAt(aIndex);
+  return alternative.forget();
+}
+
+bool SpeechRecognitionResult::IsFinal() const {
+  return true;  // TODO
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResult.h b/dom/media/webspeech/recognition/SpeechRecognitionResult.h
new file mode 100644
index 0000000000..fc9e8fd660
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResult.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechRecognitionResult_h
+#define mozilla_dom_SpeechRecognitionResult_h
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+#include "nsTArray.h"
+#include "js/TypeDecls.h"
+
+#include "mozilla/Attributes.h"
+
+#include "SpeechRecognitionAlternative.h"
+
+namespace mozilla::dom {
+
+class SpeechRecognitionResult final : public nsISupports,
+                                      public nsWrapperCache {
+ public:
+  explicit SpeechRecognitionResult(SpeechRecognition* aParent);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechRecognitionResult)
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechRecognitionAlternative> Item(uint32_t aIndex);
+
+  bool IsFinal() const;
+
+  already_AddRefed<SpeechRecognitionAlternative> IndexedGetter(uint32_t aIndex,
+                                                               bool& aPresent);
+
+  nsTArray<RefPtr<SpeechRecognitionAlternative>> mItems;
+
+ private:
+  ~SpeechRecognitionResult();
+
+  RefPtr<SpeechRecognition> mParent;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResultList.cpp b/dom/media/webspeech/recognition/SpeechRecognitionResultList.cpp
new file mode 100644
index 0000000000..2aa81a5982
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResultList.cpp
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionResultList.h"
+
+#include "mozilla/dom/SpeechRecognitionResultListBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechRecognitionResultList, mParent,
+                                      mItems)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionResultList)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionResultList)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionResultList)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionResultList::SpeechRecognitionResultList(
+    SpeechRecognition* aParent)
+    : mParent(aParent) {}
+
+SpeechRecognitionResultList::~SpeechRecognitionResultList() = default;
+
+nsISupports* SpeechRecognitionResultList::GetParentObject() const {
+  return static_cast<EventTarget*>(mParent.get());
+}
+
+JSObject* SpeechRecognitionResultList::WrapObject(
+    JSContext* aCx, JS::Handle<JSObject*> aGivenProto) {
+  return SpeechRecognitionResultList_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+already_AddRefed<SpeechRecognitionResult>
+SpeechRecognitionResultList::IndexedGetter(uint32_t aIndex, bool& aPresent) {
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+
+  aPresent = true;
+  return Item(aIndex);
+}
+
+uint32_t SpeechRecognitionResultList::Length() const { return mItems.Length(); }
+
+already_AddRefed<SpeechRecognitionResult> SpeechRecognitionResultList::Item(
+    uint32_t aIndex) {
+  RefPtr<SpeechRecognitionResult> result = mItems.ElementAt(aIndex);
+  return result.forget();
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResultList.h b/dom/media/webspeech/recognition/SpeechRecognitionResultList.h
new file mode 100644
index 0000000000..b45659564b
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResultList.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechRecognitionResultList_h
+#define mozilla_dom_SpeechRecognitionResultList_h
+
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+#include "nsTArray.h"
+#include "js/TypeDecls.h"
+
+#include "mozilla/Attributes.h"
+
+#include "SpeechRecognitionResult.h"
+
+namespace mozilla::dom {
+
+class SpeechRecognition;
+
+class SpeechRecognitionResultList final : public nsISupports,
+                                          public nsWrapperCache {
+ public:
+  explicit SpeechRecognitionResultList(SpeechRecognition* aParent);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechRecognitionResultList)
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechRecognitionResult> Item(uint32_t aIndex);
+
+  already_AddRefed<SpeechRecognitionResult> IndexedGetter(uint32_t aIndex,
+                                                          bool& aPresent);
+
+  nsTArray<RefPtr<SpeechRecognitionResult>> mItems;
+
+ private:
+  ~SpeechRecognitionResultList();
+
+  RefPtr<SpeechRecognition> mParent;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/recognition/SpeechTrackListener.cpp b/dom/media/webspeech/recognition/SpeechTrackListener.cpp
new file mode 100644
index 0000000000..036ff753ba
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechTrackListener.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechTrackListener.h"
+
+#include "SpeechRecognition.h"
+#include "nsProxyRelease.h"
+
+namespace mozilla::dom {
+
+SpeechTrackListener::SpeechTrackListener(SpeechRecognition* aRecognition)
+    : mRecognition(new nsMainThreadPtrHolder<SpeechRecognition>(
+          "SpeechTrackListener::SpeechTrackListener", aRecognition, false)),
+      mRemovedPromise(
+          mRemovedHolder.Ensure("SpeechTrackListener::mRemovedPromise")) {
+  MOZ_ASSERT(NS_IsMainThread());
+  mRemovedPromise->Then(GetCurrentSerialEventTarget(), __func__,
+                        [self = RefPtr<SpeechTrackListener>(this), this] {
+                          mRecognition = nullptr;
+                        });
+}
+
+void SpeechTrackListener::NotifyQueuedChanges(
+    MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+    const MediaSegment& aQueuedMedia) {
+  AudioSegment* audio = const_cast<AudioSegment*>(
+      static_cast<const AudioSegment*>(&aQueuedMedia));
+
+  AudioSegment::ChunkIterator iterator(*audio);
+  while (!iterator.IsEnded()) {
+    // Skip over-large chunks so we don't crash!
+    if (iterator->GetDuration() > INT_MAX) {
+      continue;
+    }
+    int duration = int(iterator->GetDuration());
+
+    if (iterator->IsNull()) {
+      nsTArray<int16_t> nullData;
+      PodZero(nullData.AppendElements(duration), duration);
+      ConvertAndDispatchAudioChunk(duration, iterator->mVolume,
+                                   nullData.Elements(), aGraph->GraphRate());
+    } else {
+      AudioSampleFormat format = iterator->mBufferFormat;
+
+      MOZ_ASSERT(format == AUDIO_FORMAT_S16 || format == AUDIO_FORMAT_FLOAT32);
+
+      if (format == AUDIO_FORMAT_S16) {
+        ConvertAndDispatchAudioChunk(
+            duration, iterator->mVolume,
+            static_cast<const int16_t*>(iterator->mChannelData[0]),
+            aGraph->GraphRate());
+      } else if (format == AUDIO_FORMAT_FLOAT32) {
+        ConvertAndDispatchAudioChunk(
+            duration, iterator->mVolume,
+            static_cast<const float*>(iterator->mChannelData[0]),
+            aGraph->GraphRate());
+      }
+    }
+
+    iterator.Next();
+  }
+}
+
+template <typename SampleFormatType>
+void SpeechTrackListener::ConvertAndDispatchAudioChunk(int aDuration,
+                                                       float aVolume,
+                                                       SampleFormatType* aData,
+                                                       TrackRate aTrackRate) {
+  CheckedInt<size_t> bufferSize(sizeof(int16_t));
+  bufferSize *= aDuration;
+  bufferSize *= 1;  // channel
+  RefPtr<SharedBuffer> samples(SharedBuffer::Create(bufferSize));
+
+  int16_t* to = static_cast<int16_t*>(samples->Data());
+  ConvertAudioSamplesWithScale(aData, to, aDuration, aVolume);
+
+  mRecognition->FeedAudioData(mRecognition, samples.forget(), aDuration, this,
+                              aTrackRate);
+}
+
+void SpeechTrackListener::NotifyEnded(MediaTrackGraph* aGraph) {
+  // TODO dispatch SpeechEnd event so services can be informed
+}
+
+void SpeechTrackListener::NotifyRemoved(MediaTrackGraph* aGraph) {
+  mRemovedHolder.ResolveIfExists(true, __func__);
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/recognition/SpeechTrackListener.h b/dom/media/webspeech/recognition/SpeechTrackListener.h
new file mode 100644
index 0000000000..423a5b0317
--- /dev/null
+++ b/dom/media/webspeech/recognition/SpeechTrackListener.h
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechStreamListener_h
+#define mozilla_dom_SpeechStreamListener_h
+
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "AudioSegment.h"
+#include "mozilla/MozPromise.h"
+
+namespace mozilla {
+
+class AudioSegment;
+
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechTrackListener : public MediaTrackListener {
+ public:
+  explicit SpeechTrackListener(SpeechRecognition* aRecognition);
+  ~SpeechTrackListener() = default;
+
+  void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+                           const MediaSegment& aQueuedMedia) override;
+
+  void NotifyEnded(MediaTrackGraph* aGraph) override;
+
+  void NotifyRemoved(MediaTrackGraph* aGraph) override;
+
+ private:
+  template <typename SampleFormatType>
+  void ConvertAndDispatchAudioChunk(int aDuration, float aVolume,
+                                    SampleFormatType* aData,
+                                    TrackRate aTrackRate);
+  nsMainThreadPtrHandle<SpeechRecognition> mRecognition;
+  MozPromiseHolder<GenericNonExclusivePromise> mRemovedHolder;
+
+ public:
+  const RefPtr<GenericNonExclusivePromise> mRemovedPromise;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/recognition/endpointer.cc b/dom/media/webspeech/recognition/endpointer.cc
new file mode 100644
index 0000000000..2347043d4b
--- /dev/null
+++ b/dom/media/webspeech/recognition/endpointer.cc
@@ -0,0 +1,193 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "endpointer.h"
+
+#include "AudioSegment.h"
+
+namespace {
+const int kFrameRate = 200;  // 1 frame = 5ms of audio.
+}
+
+namespace mozilla {
+
+Endpointer::Endpointer(int sample_rate)
+    : speech_input_possibly_complete_silence_length_us_(-1),
+      speech_input_complete_silence_length_us_(-1),
+      audio_frame_time_us_(0),
+      sample_rate_(sample_rate),
+      frame_size_(0) {
+  Reset();
+
+  frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
+
+  speech_input_minimum_length_us_ =
+      static_cast<int64_t>(1.7 * 1000000);
+  speech_input_complete_silence_length_us_ =
+      static_cast<int64_t>(0.5 * 1000000);
+  long_speech_input_complete_silence_length_us_ = -1;
+  long_speech_length_us_ = -1;
+  speech_input_possibly_complete_silence_length_us_ =
+      1 * 1000000;
+
+  // Set the default configuration for Push To Talk mode.
+  EnergyEndpointerParams ep_config;
+  ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_endpoint_margin(0.2f);
+  ep_config.set_onset_window(0.15f);
+  ep_config.set_speech_on_window(0.4f);
+  ep_config.set_offset_window(0.15f);
+  ep_config.set_onset_detect_dur(0.09f);
+  ep_config.set_onset_confirm_dur(0.075f);
+  ep_config.set_on_maintain_dur(0.10f);
+  ep_config.set_offset_confirm_dur(0.12f);
+  ep_config.set_decision_threshold(1000.0f);
+  ep_config.set_min_decision_threshold(50.0f);
+  ep_config.set_fast_update_dur(0.2f);
+  ep_config.set_sample_rate(static_cast<float>(sample_rate));
+  ep_config.set_min_fundamental_frequency(57.143f);
+  ep_config.set_max_fundamental_frequency(400.0f);
+  ep_config.set_contamination_rejection_period(0.25f);
+  energy_endpointer_.Init(ep_config);
+}
+
+void Endpointer::Reset() {
+  old_ep_status_ = EP_PRE_SPEECH;
+  waiting_for_speech_possibly_complete_timeout_ = false;
+  waiting_for_speech_complete_timeout_ = false;
+  speech_previously_detected_ = false;
+  speech_input_complete_ = false;
+  audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
+  speech_end_time_us_ = -1;
+  speech_start_time_us_ = -1;
+}
+
+void Endpointer::StartSession() {
+  Reset();
+  energy_endpointer_.StartSession();
+}
+
+void Endpointer::EndSession() {
+  energy_endpointer_.EndSession();
+}
+
+void Endpointer::SetEnvironmentEstimationMode() {
+  Reset();
+  energy_endpointer_.SetEnvironmentEstimationMode();
+}
+
+void Endpointer::SetUserInputMode() {
+  energy_endpointer_.SetUserInputMode();
+}
+
+EpStatus Endpointer::Status(int64_t *time) {
+  return energy_endpointer_.Status(time);
+}
+
+EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
+  MOZ_ASSERT(raw_audio.mBufferFormat == AUDIO_FORMAT_S16, "Audio is not in 16 bit format");
+  const int16_t* audio_data = static_cast<const int16_t*>(raw_audio.mChannelData[0]);
+  const int num_samples = raw_audio.mDuration;
+  EpStatus ep_status = EP_PRE_SPEECH;
+
+  // Process the input data in blocks of frame_size_, dropping any incomplete
+  // frames at the end (which is ok since typically the caller will be recording
+  // audio in multiples of our frame size).
+  int sample_index = 0;
+  while (sample_index + frame_size_ <= num_samples) {
+    // Have the endpointer process the frame.
+    energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
+                                         audio_data + sample_index,
+                                         frame_size_,
+                                         rms_out);
+    sample_index += frame_size_;
+    audio_frame_time_us_ += (frame_size_ * 1000000) /
+                         sample_rate_;
+
+    // Get the status of the endpointer.
+    int64_t ep_time;
+    ep_status = energy_endpointer_.Status(&ep_time);
+    if (old_ep_status_ != ep_status)
+        fprintf(stderr, "Status changed old= %d, new= %d\n", old_ep_status_, ep_status);
+
+    // Handle state changes.
+    if ((EP_SPEECH_PRESENT == ep_status) &&
+        (EP_POSSIBLE_ONSET == old_ep_status_)) {
+      speech_end_time_us_ = -1;
+      waiting_for_speech_possibly_complete_timeout_ = false;
+      waiting_for_speech_complete_timeout_ = false;
+      // Trigger SpeechInputDidStart event on first detection.
+      if (false == speech_previously_detected_) {
+        speech_previously_detected_ = true;
+        speech_start_time_us_ = ep_time;
+      }
+    }
+    if ((EP_PRE_SPEECH == ep_status) &&
+        (EP_POSSIBLE_OFFSET == old_ep_status_)) {
+      speech_end_time_us_ = ep_time;
+      waiting_for_speech_possibly_complete_timeout_ = true;
+      waiting_for_speech_complete_timeout_ = true;
+    }
+    if (ep_time > speech_input_minimum_length_us_) {
+      // Speech possibly complete timeout.
+      if ((waiting_for_speech_possibly_complete_timeout_) &&
+          (ep_time - speech_end_time_us_ >
+              speech_input_possibly_complete_silence_length_us_)) {
+        waiting_for_speech_possibly_complete_timeout_ = false;
+      }
+      if (waiting_for_speech_complete_timeout_) {
+        // The length of the silence timeout period can be held constant, or it
+        // can be changed after a fixed amount of time from the beginning of
+        // speech.
+        bool has_stepped_silence =
+            (long_speech_length_us_ > 0) &&
+            (long_speech_input_complete_silence_length_us_ > 0);
+        int64_t requested_silence_length;
+        if (has_stepped_silence &&
+            (ep_time - speech_start_time_us_) > long_speech_length_us_) {
+          requested_silence_length =
+              long_speech_input_complete_silence_length_us_;
+        } else {
+          requested_silence_length =
+              speech_input_complete_silence_length_us_;
+        }
+
+        // Speech complete timeout.
+        if ((ep_time - speech_end_time_us_) > requested_silence_length) {
+          waiting_for_speech_complete_timeout_ = false;
+          speech_input_complete_ = true;
+        }
+      }
+    }
+    old_ep_status_ = ep_status;
+  }
+  return ep_status;
+}
+
+}  // namespace mozilla
diff --git a/dom/media/webspeech/recognition/endpointer.h b/dom/media/webspeech/recognition/endpointer.h
new file mode 100644
index 0000000000..7879d6b9f3
--- /dev/null
+++ b/dom/media/webspeech/recognition/endpointer.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+
+#include "energy_endpointer.h"
+
+namespace mozilla {
+
+struct AudioChunk;
+
+// A simple interface to the underlying energy-endpointer implementation, this
+// class lets callers provide audio as being recorded and let them poll to find
+// when the user has stopped speaking.
+//
+// There are two events that may trigger the end of speech:
+//
+// speechInputPossiblyComplete event:
+//
+// Signals that silence/noise has  been detected for a *short* amount of
+// time after some speech has been detected. It can be used for low latency
+// UI feedback. To disable it, set it to a large amount.
+//
+// speechInputComplete event:
+//
+// This event is intended to signal end of input and to stop recording.
+// The amount of time to wait after speech is set by
+// speech_input_complete_silence_length_ and optionally two other
+// parameters (see below).
+// This time can be held constant, or can change as more speech is detected.
+// In the latter case, the time changes after a set amount of time from the
+// *beginning* of speech.  This is motivated by the expectation that there
+// will be two distinct types of inputs: short search queries and longer
+// dictation style input.
+//
+// Three parameters are used to define the piecewise constant timeout function.
+// The timeout length is speech_input_complete_silence_length until
+// long_speech_length, when it changes to
+// long_speech_input_complete_silence_length.
+class Endpointer {
+ public:
+  explicit Endpointer(int sample_rate);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Process a segment of audio, which may be more than one frame.
+  // The status of the last frame will be returned.
+  EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
+
+  // Get the status of the endpointer.
+  EpStatus Status(int64_t *time_us);
+
+  // Get the expected frame size for audio chunks. Audio chunks are expected
+  // to contain a number of samples that is a multiple of this number, and extra
+  // samples will be dropped.
+  int32_t FrameSize() const {
+    return frame_size_;
+  }
+
+  // Returns true if the endpointer detected reasonable audio levels above
+  // background noise which could be user speech, false if not.
+  bool DidStartReceivingSpeech() const {
+    return speech_previously_detected_;
+  }
+
+  bool IsEstimatingEnvironment() const {
+    return energy_endpointer_.estimating_environment();
+  }
+
+  void set_speech_input_complete_silence_length(int64_t time_us) {
+    speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_input_complete_silence_length(int64_t time_us) {
+    long_speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
+    speech_input_possibly_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_length(int64_t time_us) {
+    long_speech_length_us_ = time_us;
+  }
+
+  bool speech_input_complete() const {
+    return speech_input_complete_;
+  }
+
+  // RMS background noise level in dB.
+  float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
+
+ private:
+  // Reset internal states. Helper method common to initial input utterance
+  // and following input utternaces.
+  void Reset();
+
+  // Minimum allowable length of speech input.
+  int64_t speech_input_minimum_length_us_;
+
+  // The speechInputPossiblyComplete event signals that silence/noise has been
+  // detected for a *short* amount of time after some speech has been detected.
+  // This proporty specifies the time period.
+  int64_t speech_input_possibly_complete_silence_length_us_;
+
+  // The speechInputComplete event signals that silence/noise has been
+  // detected for a *long* amount of time after some speech has been detected.
+  // This property specifies the time period.
+  int64_t speech_input_complete_silence_length_us_;
+
+  // Same as above, this specifies the required silence period after speech
+  // detection. This period is used instead of
+  // speech_input_complete_silence_length_ when the utterance is longer than
+  // long_speech_length_. This parameter is optional.
+  int64_t long_speech_input_complete_silence_length_us_;
+
+  // The period of time after which the endpointer should consider
+  // long_speech_input_complete_silence_length_ as a valid silence period
+  // instead of speech_input_complete_silence_length_. This parameter is
+  // optional.
+  int64_t long_speech_length_us_;
+
+  // First speech onset time, used in determination of speech complete timeout.
+  int64_t speech_start_time_us_;
+
+  // Most recent end time, used in determination of speech complete timeout.
+  int64_t speech_end_time_us_;
+
+  int64_t audio_frame_time_us_;
+  EpStatus old_ep_status_;
+  bool waiting_for_speech_possibly_complete_timeout_;
+  bool waiting_for_speech_complete_timeout_;
+  bool speech_previously_detected_;
+  bool speech_input_complete_;
+  EnergyEndpointer energy_endpointer_;
+  int sample_rate_;
+  int32_t frame_size_;
+};
+
+}  // namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
diff --git a/dom/media/webspeech/recognition/energy_endpointer.cc b/dom/media/webspeech/recognition/energy_endpointer.cc
new file mode 100644
index 0000000000..b1c1ee0bcf
--- /dev/null
+++ b/dom/media/webspeech/recognition/energy_endpointer.cc
@@ -0,0 +1,393 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "energy_endpointer.h"
+
+#include <math.h>
+
+namespace {
+
+// Returns the RMS (quadratic mean) of the input signal.
+float RMS(const int16_t* samples, int num_samples) {
+  int64_t ssq_int64_t = 0;
+  int64_t sum_int64_t = 0;
+  for (int i = 0; i < num_samples; ++i) {
+    sum_int64_t += samples[i];
+    ssq_int64_t += samples[i] * samples[i];
+  }
+  // now convert to floats.
+  double sum = static_cast<double>(sum_int64_t);
+  sum /= num_samples;
+  double ssq = static_cast<double>(ssq_int64_t);
+  return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
+}
+
+int64_t Secs2Usecs(float seconds) {
+  return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
+}
+
+float GetDecibel(float value) {
+  if (value > 1.0e-100)
+    return 20 * log10(value);
+  return -2000.0;
+}
+
+}  // namespace
+
+namespace mozilla {
+
+// Stores threshold-crossing histories for making decisions about the speech
+// state.
+class EnergyEndpointer::HistoryRing {
+ public:
+  HistoryRing() : insertion_index_(0) {}
+
+  // Resets the ring to |size| elements each with state |initial_state|
+  void SetRing(int size, bool initial_state);
+
+  // Inserts a new entry into the ring and drops the oldest entry.
+  void Insert(int64_t time_us, bool decision);
+
+  // Returns the time in microseconds of the most recently added entry.
+  int64_t EndTime() const;
+
+  // Returns the sum of all intervals during which 'decision' is true within
+  // the time in seconds specified by 'duration'. The returned interval is
+  // in seconds.
+  float RingSum(float duration_sec);
+
+ private:
+  struct DecisionPoint {
+    int64_t time_us;
+    bool decision;
+  };
+
+  std::vector<DecisionPoint> decision_points_;
+  int insertion_index_;  // Index at which the next item gets added/inserted.
+
+  HistoryRing(const HistoryRing&);
+  void operator=(const HistoryRing&);
+};
+
+void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
+  insertion_index_ = 0;
+  decision_points_.clear();
+  DecisionPoint init = { -1, initial_state };
+  decision_points_.resize(size, init);
+}
+
+void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
+  decision_points_[insertion_index_].time_us = time_us;
+  decision_points_[insertion_index_].decision = decision;
+  insertion_index_ = (insertion_index_ + 1) % decision_points_.size();
+}
+
+int64_t EnergyEndpointer::HistoryRing::EndTime() const {
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  return decision_points_[ind].time_us;
+}
+
+float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
+  if (decision_points_.empty())
+    return 0.0;
+
+  int64_t sum_us = 0;
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  int64_t end_us = decision_points_[ind].time_us;
+  bool is_on = decision_points_[ind].decision;
+  int64_t start_us = end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
+  if (start_us < 0)
+    start_us = 0;
+  size_t n_summed = 1;  // n points ==> (n-1) intervals
+  while ((decision_points_[ind].time_us > start_us) &&
+         (n_summed < decision_points_.size())) {
+    --ind;
+    if (ind < 0)
+      ind = decision_points_.size() - 1;
+    if (is_on)
+      sum_us += end_us - decision_points_[ind].time_us;
+    is_on = decision_points_[ind].decision;
+    end_us = decision_points_[ind].time_us;
+    n_summed++;
+  }
+
+  return 1.0e-6f * sum_us;  //  Returns total time that was super threshold.
+}
+
+EnergyEndpointer::EnergyEndpointer()
+    : status_(EP_PRE_SPEECH),
+      offset_confirm_dur_sec_(0),
+      endpointer_time_us_(0),
+      fast_update_frames_(0),
+      frame_counter_(0),
+      max_window_dur_(4.0),
+      sample_rate_(0),
+      history_(new HistoryRing()),
+      decision_threshold_(0),
+      estimating_environment_(false),
+      noise_level_(0),
+      rms_adapt_(0),
+      start_lag_(0),
+      end_lag_(0),
+      user_input_start_time_us_(0) {
+}
+
+EnergyEndpointer::~EnergyEndpointer() {
+}
+
+int EnergyEndpointer::TimeToFrame(float time) const {
+  return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
+}
+
+void EnergyEndpointer::Restart(bool reset_threshold) {
+  status_ = EP_PRE_SPEECH;
+  user_input_start_time_us_ = 0;
+
+  if (reset_threshold) {
+    decision_threshold_ = params_.decision_threshold();
+    rms_adapt_ = decision_threshold_;
+    noise_level_ = params_.decision_threshold() / 2.0f;
+    frame_counter_ = 0;  // Used for rapid initial update of levels.
+  }
+
+  // Set up the memories to hold the history windows.
+  history_->SetRing(TimeToFrame(max_window_dur_), false);
+
+  // Flag that indicates that current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+}
+
+void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
+  params_ = params;
+
+  // Find the longest history interval to be used, and make the ring
+  // large enough to accommodate that number of frames.  NOTE: This
+  // depends upon ep_frame_period being set correctly in the factory
+  // that did this instantiation.
+  max_window_dur_ = params_.onset_window();
+  if (params_.speech_on_window() > max_window_dur_)
+    max_window_dur_ = params_.speech_on_window();
+  if (params_.offset_window() > max_window_dur_)
+    max_window_dur_ = params_.offset_window();
+  Restart(true);
+
+  offset_confirm_dur_sec_ = params_.offset_window() -
+                            params_.offset_confirm_dur();
+  if (offset_confirm_dur_sec_ < 0.0)
+    offset_confirm_dur_sec_ = 0.0;
+
+  user_input_start_time_us_ = 0;
+
+  // Flag that indicates that  current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+  // The initial value of the noise and speech levels is inconsequential.
+  // The level of the first frame will overwrite these values.
+  noise_level_ = params_.decision_threshold() / 2.0f;
+  fast_update_frames_ =
+      static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());
+
+  frame_counter_ = 0;  // Used for rapid initial update of levels.
+
+  sample_rate_ = params_.sample_rate();
+  start_lag_ = static_cast<int>(sample_rate_ /
+                                params_.max_fundamental_frequency());
+  end_lag_ = static_cast<int>(sample_rate_ /
+                              params_.min_fundamental_frequency());
+}
+
+void EnergyEndpointer::StartSession() {
+  Restart(true);
+}
+
+void EnergyEndpointer::EndSession() {
+  status_ = EP_POST_SPEECH;
+}
+
+void EnergyEndpointer::SetEnvironmentEstimationMode() {
+  Restart(true);
+  estimating_environment_ = true;
+}
+
+void EnergyEndpointer::SetUserInputMode() {
+  estimating_environment_ = false;
+  user_input_start_time_us_ = endpointer_time_us_;
+}
+
+void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
+                                         const int16_t* samples,
+                                         int num_samples,
+                                         float* rms_out) {
+  endpointer_time_us_ = time_us;
+  float rms = RMS(samples, num_samples);
+
+  // Check that this is user input audio vs. pre-input adaptation audio.
+  // Input audio starts when the user indicates start of input, by e.g.
+  // pressing push-to-talk. Audio recieved prior to that is used to update
+  // noise and speech level estimates.
+  if (!estimating_environment_) {
+    bool decision = false;
+    if ((endpointer_time_us_ - user_input_start_time_us_) <
+        Secs2Usecs(params_.contamination_rejection_period())) {
+      decision = false;
+      //PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, ("decision: forced to false, time: %d", endpointer_time_us_));
+    } else {
+      decision = (rms > decision_threshold_);
+    }
+
+    history_->Insert(endpointer_time_us_, decision);
+
+    switch (status_) {
+      case EP_PRE_SPEECH:
+        if (history_->RingSum(params_.onset_window()) >
+            params_.onset_detect_dur()) {
+          status_ = EP_POSSIBLE_ONSET;
+        }
+        break;
+
+      case EP_POSSIBLE_ONSET: {
+        float tsum = history_->RingSum(params_.onset_window());
+        if (tsum > params_.onset_confirm_dur()) {
+          status_ = EP_SPEECH_PRESENT;
+        } else {  // If signal is not maintained, drop back to pre-speech.
+          if (tsum <= params_.onset_detect_dur())
+            status_ = EP_PRE_SPEECH;
+        }
+        break;
+      }
+
+      case EP_SPEECH_PRESENT: {
+        // To induce hysteresis in the state residency, we allow a
+        // smaller residency time in the on_ring, than was required to
+        // enter the SPEECH_PERSENT state.
+        float on_time = history_->RingSum(params_.speech_on_window());
+        if (on_time < params_.on_maintain_dur())
+          status_ = EP_POSSIBLE_OFFSET;
+        break;
+      }
+
+      case EP_POSSIBLE_OFFSET:
+        if (history_->RingSum(params_.offset_window()) <=
+            offset_confirm_dur_sec_) {
+          // Note that this offset time may be beyond the end
+          // of the input buffer in a real-time system.  It will be up
+          // to the RecognizerSession to decide what to do.
+          status_ = EP_PRE_SPEECH;  // Automatically reset for next utterance.
+        } else {  // If speech picks up again we allow return to SPEECH_PRESENT.
+          if (history_->RingSum(params_.speech_on_window()) >=
+              params_.on_maintain_dur())
+            status_ = EP_SPEECH_PRESENT;
+        }
+        break;
+
+      default:
+        break;
+    }
+
+    // If this is a quiet, non-speech region, slowly adapt the detection
+    // threshold to be about 6dB above the average RMS.
+    if ((!decision) && (status_ == EP_PRE_SPEECH)) {
+      decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
+      rms_adapt_ = decision_threshold_;
+    } else {
+      // If this is in a speech region, adapt the decision threshold to
+      // be about 10dB below the average RMS. If the noise level is high,
+      // the threshold is pushed up.
+      // Adaptation up to a higher level is 5 times faster than decay to
+      // a lower level.
+      if ((status_ == EP_SPEECH_PRESENT) && decision) {
+        if (rms_adapt_ > rms) {
+          rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
+        } else {
+          rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
+        }
+        float target_threshold = 0.3f * rms_adapt_ +  noise_level_;
+        decision_threshold_ = (.90f * decision_threshold_) +
+                              (0.10f * target_threshold);
+      }
+    }
+
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+
+  // Update speech and noise levels.
+  UpdateLevels(rms);
+  ++frame_counter_;
+
+  if (rms_out)
+    *rms_out = GetDecibel(rms);
+}
+
+float EnergyEndpointer::GetNoiseLevelDb() const {
+  return GetDecibel(noise_level_);
+}
+
+void EnergyEndpointer::UpdateLevels(float rms) {
+  // Update quickly initially. We assume this is noise and that
+  // speech is 6dB above the noise.
+  if (frame_counter_ < fast_update_frames_) {
+    // Alpha increases from 0 to (k-1)/k where k is the number of time
+    // steps in the initial adaptation period.
+    float alpha = static_cast<float>(frame_counter_) /
+        static_cast<float>(fast_update_frames_);
+    noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
+    //PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, ("FAST UPDATE, frame_counter_ %d, fast_update_frames_ %d", frame_counter_, fast_update_frames_));
+  } else {
+    // Update Noise level. The noise level adapts quickly downward, but
+    // slowly upward. The noise_level_ parameter is not currently used
+    // for threshold adaptation. It is used for UI feedback.
+    if (noise_level_ < rms)
+      noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
+    else
+      noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
+  }
+  if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
+    decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+}
+
+EpStatus EnergyEndpointer::Status(int64_t* status_time)  const {
+  *status_time = history_->EndTime();
+  return status_;
+}
+
+}  // namespace mozilla
diff --git a/dom/media/webspeech/recognition/energy_endpointer.h b/dom/media/webspeech/recognition/energy_endpointer.h
new file mode 100644
index 0000000000..12d3c736e3
--- /dev/null
+++ b/dom/media/webspeech/recognition/energy_endpointer.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The EnergyEndpointer class finds likely speech onset and offset points.
+//
+// The implementation described here is about the simplest possible.
+// It is based on timings of threshold crossings for overall signal
+// RMS. It is suitable for light weight applications.
+//
+// As written, the basic idea is that one specifies intervals that
+// must be occupied by super- and sub-threshold energy levels, and
+// defers decisions re onset and offset times until these
+// specifications have been met.  Three basic intervals are tested: an
+// onset window, a speech-on window, and an offset window.  We require
+// super-threshold to exceed some mimimum total durations in the onset
+// and speech-on windows before declaring the speech onset time, and
+// we specify a required sub-threshold residency in the offset window
+// before declaring speech offset. As the various residency requirements are
+// met, the EnergyEndpointer instance assumes various states, and can return the
+// ID of these states to the client (see EpStatus below).
+//
+// The levels of the speech and background noise are continuously updated. It is
+// important that the background noise level be estimated initially for
+// robustness in noisy conditions. The first frames are assumed to be background
+// noise and a fast update rate is used for the noise level. The duration for
+// fast update is controlled by the fast_update_dur_ paramter.
+//
+// If used in noisy conditions, the endpointer should be started and run in the
+// EnvironmentEstimation mode, for at least 200ms, before switching to
+// UserInputMode.
+// Audio feedback contamination can appear in the input audio, if not cut
+// out or handled by echo cancellation. Audio feedback can trigger a false
+// accept. The false accepts can be ignored by setting
+// ep_contamination_rejection_period.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+
+#include <vector>
+
+#include "mozilla/UniquePtr.h"
+
+#include "energy_endpointer_params.h"
+
+namespace mozilla {
+
+// Endpointer status codes
+enum EpStatus {
+  EP_PRE_SPEECH = 10,
+  EP_POSSIBLE_ONSET,
+  EP_SPEECH_PRESENT,
+  EP_POSSIBLE_OFFSET,
+  EP_POST_SPEECH,
+};
+
+class EnergyEndpointer {
+ public:
+  // The default construction MUST be followed by Init(), before any
+  // other use can be made of the instance.
+  EnergyEndpointer();
+  virtual ~EnergyEndpointer();
+
+  void Init(const EnergyEndpointerParams& params);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Computes the next input frame and modifies EnergyEndpointer status as
+  // appropriate based on the computation.
+  void ProcessAudioFrame(int64_t time_us,
+                         const int16_t* samples, int num_samples,
+                         float* rms_out);
+
+  // Returns the current state of the EnergyEndpointer and the time
+  // corresponding to the most recently computed frame.
+  EpStatus Status(int64_t* status_time_us) const;
+
+  bool estimating_environment() const {
+    return estimating_environment_;
+  }
+
+  // Returns estimated noise level in dB.
+  float GetNoiseLevelDb() const;
+
+ private:
+  class HistoryRing;
+
+  // Resets the endpointer internal state.  If reset_threshold is true, the
+  // state will be reset completely, including adaptive thresholds and the
+  // removal of all history information.
+  void Restart(bool reset_threshold);
+
+  // Update internal speech and noise levels.
+  void UpdateLevels(float rms);
+
+  // Returns the number of frames (or frame number) corresponding to
+  // the 'time' (in seconds).
+  int TimeToFrame(float time) const;
+
+  EpStatus status_;  // The current state of this instance.
+  float offset_confirm_dur_sec_;  // max on time allowed to confirm POST_SPEECH
+  int64_t endpointer_time_us_;  // Time of the most recently received audio frame.
+  int64_t fast_update_frames_; // Number of frames for initial level adaptation.
+  int64_t frame_counter_;  // Number of frames seen. Used for initial adaptation.
+  float max_window_dur_;  // Largest search window size (seconds)
+  float sample_rate_;  // Sampling rate.
+
+  // Ring buffers to hold the speech activity history.
+  UniquePtr<HistoryRing> history_;
+
+  // Configuration parameters.
+  EnergyEndpointerParams params_;
+
+  // RMS which must be exceeded to conclude frame is speech.
+  float decision_threshold_;
+
+  // Flag to indicate that audio should be used to estimate environment, prior
+  // to receiving user input.
+  bool estimating_environment_;
+
+  // Estimate of the background noise level. Used externally for UI feedback.
+  float noise_level_;
+
+  // An adaptive threshold used to update decision_threshold_ when appropriate.
+  float rms_adapt_;
+
+  // Start lag corresponds to the highest fundamental frequency.
+  int start_lag_;
+
+  // End lag corresponds to the lowest fundamental frequency.
+  int end_lag_;
+
+  // Time when mode switched from environment estimation to user input. This
+  // is used to time forced rejection of audio feedback contamination.
+  int64_t user_input_start_time_us_;
+
+  // prevent copy constructor and assignment
+  EnergyEndpointer(const EnergyEndpointer&);
+  void operator=(const EnergyEndpointer&);
+};
+
+}  // namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
diff --git a/dom/media/webspeech/recognition/energy_endpointer_params.cc b/dom/media/webspeech/recognition/energy_endpointer_params.cc
new file mode 100644
index 0000000000..cac4f1b238
--- /dev/null
+++ b/dom/media/webspeech/recognition/energy_endpointer_params.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "energy_endpointer_params.h"
+
+namespace mozilla {
+
+EnergyEndpointerParams::EnergyEndpointerParams() {
+  SetDefaults();
+}
+
+void EnergyEndpointerParams::SetDefaults() {
+  frame_period_ = 0.01f;
+  frame_duration_ = 0.01f;
+  endpoint_margin_ = 0.2f;
+  onset_window_ = 0.15f;
+  speech_on_window_ = 0.4f;
+  offset_window_ = 0.15f;
+  onset_detect_dur_ = 0.09f;
+  onset_confirm_dur_ = 0.075f;
+  on_maintain_dur_ = 0.10f;
+  offset_confirm_dur_ = 0.12f;
+  decision_threshold_ = 150.0f;
+  min_decision_threshold_ = 50.0f;
+  fast_update_dur_ = 0.2f;
+  sample_rate_ = 8000.0f;
+  min_fundamental_frequency_ = 57.143f;
+  max_fundamental_frequency_ = 400.0f;
+  contamination_rejection_period_ = 0.25f;
+}
+
+void EnergyEndpointerParams::operator=(const EnergyEndpointerParams& source) {
+  frame_period_ = source.frame_period();
+  frame_duration_ = source.frame_duration();
+  endpoint_margin_ = source.endpoint_margin();
+  onset_window_ = source.onset_window();
+  speech_on_window_ = source.speech_on_window();
+  offset_window_ = source.offset_window();
+  onset_detect_dur_ = source.onset_detect_dur();
+  onset_confirm_dur_ = source.onset_confirm_dur();
+  on_maintain_dur_ = source.on_maintain_dur();
+  offset_confirm_dur_ = source.offset_confirm_dur();
+  decision_threshold_ = source.decision_threshold();
+  min_decision_threshold_ = source.min_decision_threshold();
+  fast_update_dur_ = source.fast_update_dur();
+  sample_rate_ = source.sample_rate();
+  min_fundamental_frequency_ = source.min_fundamental_frequency();
+  max_fundamental_frequency_ = source.max_fundamental_frequency();
+  contamination_rejection_period_ = source.contamination_rejection_period();
+}
+
+}  //  namespace mozilla
diff --git a/dom/media/webspeech/recognition/energy_endpointer_params.h b/dom/media/webspeech/recognition/energy_endpointer_params.h
new file mode 100644
index 0000000000..6437c6dc0f
--- /dev/null
+++ b/dom/media/webspeech/recognition/energy_endpointer_params.h
@@ -0,0 +1,159 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+
+namespace mozilla {
+
+// Input parameters for the EnergyEndpointer class.
+class EnergyEndpointerParams {
+ public:
+  EnergyEndpointerParams();
+
+  void SetDefaults();
+
+  void operator=(const EnergyEndpointerParams& source);
+
+  // Accessors and mutators
+  float frame_period() const { return frame_period_; }
+  void set_frame_period(float frame_period) {
+    frame_period_ = frame_period;
+  }
+
+  float frame_duration() const { return frame_duration_; }
+  void set_frame_duration(float frame_duration) {
+    frame_duration_ = frame_duration;
+  }
+
+  float endpoint_margin() const { return endpoint_margin_; }
+  void set_endpoint_margin(float endpoint_margin) {
+    endpoint_margin_ = endpoint_margin;
+  }
+
+  float onset_window() const { return onset_window_; }
+  void set_onset_window(float onset_window) { onset_window_ = onset_window; }
+
+  float speech_on_window() const { return speech_on_window_; }
+  void set_speech_on_window(float speech_on_window) {
+    speech_on_window_ = speech_on_window;
+  }
+
+  float offset_window() const { return offset_window_; }
+  void set_offset_window(float offset_window) {
+    offset_window_ = offset_window;
+  }
+
+  float onset_detect_dur() const { return onset_detect_dur_; }
+  void set_onset_detect_dur(float onset_detect_dur) {
+    onset_detect_dur_ = onset_detect_dur;
+  }
+
+  float onset_confirm_dur() const { return onset_confirm_dur_; }
+  void set_onset_confirm_dur(float onset_confirm_dur) {
+    onset_confirm_dur_ = onset_confirm_dur;
+  }
+
+  float on_maintain_dur() const { return on_maintain_dur_; }
+  void set_on_maintain_dur(float on_maintain_dur) {
+    on_maintain_dur_ = on_maintain_dur;
+  }
+
+  float offset_confirm_dur() const { return offset_confirm_dur_; }
+  void set_offset_confirm_dur(float offset_confirm_dur) {
+    offset_confirm_dur_ = offset_confirm_dur;
+  }
+
+  float decision_threshold() const { return decision_threshold_; }
+  void set_decision_threshold(float decision_threshold) {
+    decision_threshold_ = decision_threshold;
+  }
+
+  float min_decision_threshold() const { return min_decision_threshold_; }
+  void set_min_decision_threshold(float min_decision_threshold) {
+    min_decision_threshold_ = min_decision_threshold;
+  }
+
+  float fast_update_dur() const { return fast_update_dur_; }
+  void set_fast_update_dur(float fast_update_dur) {
+    fast_update_dur_ = fast_update_dur;
+  }
+
+  float sample_rate() const { return sample_rate_; }
+  void set_sample_rate(float sample_rate) { sample_rate_ = sample_rate; }
+
+  float min_fundamental_frequency() const { return min_fundamental_frequency_; }
+  void set_min_fundamental_frequency(float min_fundamental_frequency) {
+    min_fundamental_frequency_ = min_fundamental_frequency;
+  }
+
+  float max_fundamental_frequency() const { return max_fundamental_frequency_; }
+  void set_max_fundamental_frequency(float max_fundamental_frequency) {
+    max_fundamental_frequency_ = max_fundamental_frequency;
+  }
+
+  float contamination_rejection_period() const {
+    return contamination_rejection_period_;
+  }
+  void set_contamination_rejection_period(
+      float contamination_rejection_period) {
+    contamination_rejection_period_ = contamination_rejection_period;
+  }
+
+ private:
+  float frame_period_;  // Frame period
+  float frame_duration_;  // Window size
+  float onset_window_;  // Interval scanned for onset activity
+  float speech_on_window_;  // Inverval scanned for ongoing speech
+  float offset_window_;  // Interval scanned for offset evidence
+  float offset_confirm_dur_;  // Silence duration required to confirm offset
+  float decision_threshold_;  // Initial rms detection threshold
+  float min_decision_threshold_;  // Minimum rms detection threshold
+  float fast_update_dur_;  // Period for initial estimation of levels.
+  float sample_rate_;  // Expected sample rate.
+
+  // Time to add on either side of endpoint threshold crossings
+  float endpoint_margin_;
+  // Total dur within onset_window required to enter ONSET state
+  float onset_detect_dur_;
+  // Total on time within onset_window required to enter SPEECH_ON state
+  float onset_confirm_dur_;
+  // Minimum dur in SPEECH_ON state required to maintain ON state
+  float on_maintain_dur_;
+  // Minimum fundamental frequency for autocorrelation.
+  float min_fundamental_frequency_;
+  // Maximum fundamental frequency for autocorrelation.
+  float max_fundamental_frequency_;
+  // Period after start of user input that above threshold values are ignored.
+  // This is to reject audio feedback contamination.
+  float contamination_rejection_period_;
+};
+
+}  //  namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
diff --git a/dom/media/webspeech/recognition/moz.build b/dom/media/webspeech/recognition/moz.build
new file mode 100644
index 0000000000..5fdf8fdd47
--- /dev/null
+++ b/dom/media/webspeech/recognition/moz.build
@@ -0,0 +1,64 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+MOCHITEST_MANIFESTS += ["test/mochitest.ini"]
+
+XPIDL_MODULE = "dom_webspeechrecognition"
+
+XPIDL_SOURCES = ["nsISpeechRecognitionService.idl"]
+
+EXPORTS.mozilla.dom += [
+    "OnlineSpeechRecognitionService.h",
+    "SpeechGrammar.h",
+    "SpeechGrammarList.h",
+    "SpeechRecognition.h",
+    "SpeechRecognitionAlternative.h",
+    "SpeechRecognitionResult.h",
+    "SpeechRecognitionResultList.h",
+    "SpeechTrackListener.h",
+]
+
+EXPORTS += [
+    "endpointer.h",
+    "energy_endpointer.h",
+    "energy_endpointer_params.h",
+]
+
+if CONFIG["MOZ_WEBSPEECH_TEST_BACKEND"]:
+    EXPORTS.mozilla.dom += [
+        "test/FakeSpeechRecognitionService.h",
+    ]
+
+UNIFIED_SOURCES += [
+    "endpointer.cc",
+    "energy_endpointer.cc",
+    "energy_endpointer_params.cc",
+    "OnlineSpeechRecognitionService.cpp",
+    "SpeechGrammar.cpp",
+    "SpeechGrammarList.cpp",
+    "SpeechRecognition.cpp",
+    "SpeechRecognitionAlternative.cpp",
+    "SpeechRecognitionResult.cpp",
+    "SpeechRecognitionResultList.cpp",
+    "SpeechTrackListener.cpp",
+]
+
+if CONFIG["MOZ_WEBSPEECH_TEST_BACKEND"]:
+    UNIFIED_SOURCES += [
+        "test/FakeSpeechRecognitionService.cpp",
+    ]
+
+USE_LIBS += [
+    "jsoncpp",
+]
+
+LOCAL_INCLUDES += [
+    "/dom/base",
+    "/toolkit/components/jsoncpp/include",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"
diff --git a/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl b/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl
new file mode 100644
index 0000000000..a43d277da0
--- /dev/null
+++ b/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+%{C++
+#include "mozilla/WeakPtr.h"
+
+namespace mozilla {
+class AudioSegment;
+namespace dom {
+class SpeechRecognition;
+class SpeechRecognitionResultList;
+class SpeechGrammarList;
+class SpeechGrammar;
+}
+}
+%}
+
+native SpeechRecognitionWeakPtr(mozilla::WeakPtr<mozilla::dom::SpeechRecognition>);
+[ptr] native AudioSegmentPtr(mozilla::AudioSegment);
+[ptr] native SpeechGrammarPtr(mozilla::dom::SpeechGrammar);
+[ptr] native SpeechGrammarListPtr(mozilla::dom::SpeechGrammarList);
+
+[uuid(6fcb6ee8-a6db-49ba-9f06-355d7ee18ea7)]
+interface nsISpeechGrammarCompilationCallback : nsISupports {
+    void grammarCompilationEnd(in SpeechGrammarPtr grammarObject, in boolean success);
+};
+
+[uuid(8e97f287-f322-44e8-8888-8344fa408ef8)]
+interface nsISpeechRecognitionService : nsISupports {
+    void initialize(in SpeechRecognitionWeakPtr aSpeechRecognition);
+    void processAudioSegment(in AudioSegmentPtr aAudioSegment, in long aSampleRate);
+    void validateAndSetGrammarList(in SpeechGrammarPtr aSpeechGrammar, in nsISpeechGrammarCompilationCallback aCallback);
+    void soundEnd();
+    void abort();
+};
+
+%{C++
+#define NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "@mozilla.org/webspeech/service;1?name="
+%}
diff --git a/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp
new file mode 100644
index 0000000000..cf14cb3750
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsThreadUtils.h"
+
+#include "FakeSpeechRecognitionService.h"
+
+#include "SpeechRecognition.h"
+#include "SpeechRecognitionAlternative.h"
+#include "SpeechRecognitionResult.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsIObserverService.h"
+#include "mozilla/Services.h"
+#include "mozilla/StaticPrefs_media.h"
+
+namespace mozilla {
+
+using namespace dom;
+
+NS_IMPL_ISUPPORTS(FakeSpeechRecognitionService, nsISpeechRecognitionService,
+                  nsIObserver)
+
+FakeSpeechRecognitionService::FakeSpeechRecognitionService() = default;
+
+FakeSpeechRecognitionService::~FakeSpeechRecognitionService() = default;
+
+NS_IMETHODIMP
+FakeSpeechRecognitionService::Initialize(
+    WeakPtr<SpeechRecognition> aSpeechRecognition) {
+  MOZ_ASSERT(NS_IsMainThread());
+  mRecognition = aSpeechRecognition;
+  nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+  obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
+  obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+FakeSpeechRecognitionService::ProcessAudioSegment(AudioSegment* aAudioSegment,
+                                                  int32_t aSampleRate) {
+  MOZ_ASSERT(!NS_IsMainThread());
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+FakeSpeechRecognitionService::SoundEnd() {
+  MOZ_ASSERT(NS_IsMainThread());
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+FakeSpeechRecognitionService::ValidateAndSetGrammarList(
+    mozilla::dom::SpeechGrammar*, nsISpeechGrammarCompilationCallback*) {
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+FakeSpeechRecognitionService::Abort() {
+  MOZ_ASSERT(NS_IsMainThread());
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+FakeSpeechRecognitionService::Observe(nsISupports* aSubject, const char* aTopic,
+                                      const char16_t* aData) {
+  MOZ_ASSERT(StaticPrefs::media_webspeech_test_fake_recognition_service(),
+             "Got request to fake recognition service event, but "
+             "media.webspeech.test.fake_recognition_service is not set");
+
+  if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) {
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC);
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC);
+
+    return NS_OK;
+  }
+
+  const nsDependentString eventName = nsDependentString(aData);
+
+  if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_ERROR")) {
+    mRecognition->DispatchError(
+        SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+        SpeechRecognitionErrorCode::Network,  // TODO different codes?
+        "RECOGNITIONSERVICE_ERROR test event");
+
+  } else if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_FINAL_RESULT")) {
+    RefPtr<SpeechEvent> event = new SpeechEvent(
+        mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
+
+    event->mRecognitionResultList = BuildMockResultList();
+    NS_DispatchToMainThread(event);
+  }
+  return NS_OK;
+}
+
+SpeechRecognitionResultList*
+FakeSpeechRecognitionService::BuildMockResultList() {
+  SpeechRecognitionResultList* resultList =
+      new SpeechRecognitionResultList(mRecognition);
+  SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
+  if (0 < mRecognition->MaxAlternatives()) {
+    SpeechRecognitionAlternative* alternative =
+        new SpeechRecognitionAlternative(mRecognition);
+
+    alternative->mTranscript = u"Mock final result"_ns;
+    alternative->mConfidence = 0.0f;
+
+    result->mItems.AppendElement(alternative);
+  }
+  resultList->mItems.AppendElement(result);
+
+  return resultList;
+}
+
+}  // namespace mozilla
diff --git a/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.h b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.h
new file mode 100644
index 0000000000..69e2786b76
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.h
@@ -0,0 +1,40 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_FakeSpeechRecognitionService_h
+#define mozilla_dom_FakeSpeechRecognitionService_h
+
+#include "nsCOMPtr.h"
+#include "nsIObserver.h"
+#include "nsISpeechRecognitionService.h"
+
+#define NS_FAKE_SPEECH_RECOGNITION_SERVICE_CID \
+  {0x48c345e7,                                 \
+   0x9929,                                     \
+   0x4f9a,                                     \
+   {0xa5, 0x63, 0xf4, 0x78, 0x22, 0x2d, 0xab, 0xcd}};
+
+namespace mozilla {
+
+class FakeSpeechRecognitionService : public nsISpeechRecognitionService,
+                                     public nsIObserver {
+ public:
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSISPEECHRECOGNITIONSERVICE
+  NS_DECL_NSIOBSERVER
+
+  FakeSpeechRecognitionService();
+
+ private:
+  virtual ~FakeSpeechRecognitionService();
+
+  WeakPtr<dom::SpeechRecognition> mRecognition;
+  dom::SpeechRecognitionResultList* BuildMockResultList();
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/recognition/test/head.js b/dom/media/webspeech/recognition/test/head.js
new file mode 100644
index 0000000000..c77a7ee926
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/head.js
@@ -0,0 +1,200 @@
+"use strict";
+
+const DEFAULT_AUDIO_SAMPLE_FILE = "hello.ogg";
+const SPEECH_RECOGNITION_TEST_REQUEST_EVENT_TOPIC =
+  "SpeechRecognitionTest:RequestEvent";
+const SPEECH_RECOGNITION_TEST_END_TOPIC = "SpeechRecognitionTest:End";
+
+var errorCodes = {
+  NO_SPEECH: "no-speech",
+  ABORTED: "aborted",
+  AUDIO_CAPTURE: "audio-capture",
+  NETWORK: "network",
+  NOT_ALLOWED: "not-allowed",
+  SERVICE_NOT_ALLOWED: "service-not-allowed",
+  BAD_GRAMMAR: "bad-grammar",
+  LANGUAGE_NOT_SUPPORTED: "language-not-supported",
+};
+
+var Services = SpecialPowers.Services;
+
+function EventManager(sr) {
+  var self = this;
+  var nEventsExpected = 0;
+  self.eventsReceived = [];
+
+  var allEvents = [
+    "audiostart",
+    "soundstart",
+    "speechstart",
+    "speechend",
+    "soundend",
+    "audioend",
+    "result",
+    "nomatch",
+    "error",
+    "start",
+    "end",
+  ];
+
+  var eventDependencies = {
+    speechend: "speechstart",
+    soundend: "soundstart",
+    audioend: "audiostart",
+  };
+
+  var isDone = false;
+
+  // set up grammar
+  var sgl = new SpeechGrammarList();
+  sgl.addFromString("#JSGF V1.0; grammar test; public <simple> = hello ;", 1);
+  sr.grammars = sgl;
+
+  // AUDIO_DATA events are asynchronous,
+  // so we queue events requested while they are being
+  // issued to make them seem synchronous
+  var isSendingAudioData = false;
+  var queuedEventRequests = [];
+
+  // register default handlers
+  for (var i = 0; i < allEvents.length; i++) {
+    (function (eventName) {
+      sr["on" + eventName] = function (evt) {
+        var message = "unexpected event: " + eventName;
+        if (eventName == "error") {
+          message += " -- " + evt.message;
+        }
+
+        ok(false, message);
+        if (self.doneFunc && !isDone) {
+          isDone = true;
+          self.doneFunc();
+        }
+      };
+    })(allEvents[i]);
+  }
+
+  self.expect = function EventManager_expect(eventName, cb) {
+    nEventsExpected++;
+
+    sr["on" + eventName] = function (evt) {
+      self.eventsReceived.push(eventName);
+      ok(true, "received event " + eventName);
+
+      var dep = eventDependencies[eventName];
+      if (dep) {
+        ok(
+          self.eventsReceived.includes(dep),
+          eventName + " must come after " + dep
+        );
+      }
+
+      cb && cb(evt, sr);
+      if (
+        self.doneFunc &&
+        !isDone &&
+        nEventsExpected === self.eventsReceived.length
+      ) {
+        isDone = true;
+        self.doneFunc();
+      }
+    };
+  };
+
+  self.start = function EventManager_start() {
+    isSendingAudioData = true;
+    var audioTag = document.createElement("audio");
+    audioTag.src = self.audioSampleFile;
+
+    var stream = audioTag.mozCaptureStreamUntilEnded();
+    audioTag.addEventListener("ended", function () {
+      info("Sample stream ended, requesting queued events");
+      isSendingAudioData = false;
+      while (queuedEventRequests.length) {
+        self.requestFSMEvent(queuedEventRequests.shift());
+      }
+    });
+
+    audioTag.play();
+    sr.start(stream);
+  };
+
+  self.requestFSMEvent = function EventManager_requestFSMEvent(eventName) {
+    if (isSendingAudioData) {
+      info(
+        "Queuing event " + eventName + " until we're done sending audio data"
+      );
+      queuedEventRequests.push(eventName);
+      return;
+    }
+
+    info("requesting " + eventName);
+    Services.obs.notifyObservers(
+      null,
+      SPEECH_RECOGNITION_TEST_REQUEST_EVENT_TOPIC,
+      eventName
+    );
+  };
+
+  self.requestTestEnd = function EventManager_requestTestEnd() {
+    Services.obs.notifyObservers(null, SPEECH_RECOGNITION_TEST_END_TOPIC);
+  };
+}
+
+function buildResultCallback(transcript) {
+  return function (evt) {
+    is(evt.results[0][0].transcript, transcript, "expect correct transcript");
+  };
+}
+
+function buildErrorCallback(errcode) {
+  return function (err) {
+    is(err.error, errcode, "expect correct error code");
+  };
+}
+
+function performTest(options) {
+  var prefs = options.prefs;
+
+  prefs.unshift(
+    ["media.webspeech.recognition.enable", true],
+    ["media.webspeech.test.enable", true]
+  );
+
+  SpecialPowers.pushPrefEnv({ set: prefs }, function () {
+    var sr;
+    if (!options.webkit) {
+      sr = new SpeechRecognition();
+    } else {
+      sr = new webkitSpeechRecognition();
+      var grammar = new webkitSpeechGrammar();
+      var speechrecognitionlist = new webkitSpeechGrammarList();
+      speechrecognitionlist.addFromString("", 1);
+      sr.grammars = speechrecognitionlist;
+    }
+    var em = new EventManager(sr);
+
+    for (var eventName in options.expectedEvents) {
+      var cb = options.expectedEvents[eventName];
+      em.expect(eventName, cb);
+    }
+
+    em.doneFunc = function () {
+      em.requestTestEnd();
+      if (options.doneFunc) {
+        options.doneFunc();
+      }
+    };
+
+    em.audioSampleFile = DEFAULT_AUDIO_SAMPLE_FILE;
+    if (options.audioSampleFile) {
+      em.audioSampleFile = options.audioSampleFile;
+    }
+
+    em.start();
+
+    for (var i = 0; i < options.eventsToRequest.length; i++) {
+      em.requestFSMEvent(options.eventsToRequest[i]);
+    }
+  });
+}
diff --git a/dom/media/webspeech/recognition/test/hello.ogg b/dom/media/webspeech/recognition/test/hello.ogg
new file mode 100644
index 0000000000..7a80926065
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/hello.ogg
diff --git a/dom/media/webspeech/recognition/test/hello.ogg^headers^ b/dom/media/webspeech/recognition/test/hello.ogg^headers^
new file mode 100644
index 0000000000..4030ea1d3d
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/hello.ogg^headers^
@@ -0,0 +1 @@
+Cache-Control: no-store
diff --git a/dom/media/webspeech/recognition/test/http_requesthandler.sjs b/dom/media/webspeech/recognition/test/http_requesthandler.sjs
new file mode 100644
index 0000000000..3400df50ec
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/http_requesthandler.sjs
@@ -0,0 +1,85 @@
+const CC = Components.Constructor;
+
+// Context structure - we need to set this up properly to pass to setObjectState
+const ctx = {
+  QueryInterface(iid) {
+    if (iid.equals(Components.interfaces.nsISupports)) {
+      return this;
+    }
+    throw Components.Exception("", Components.results.NS_ERROR_NO_INTERFACE);
+  },
+};
+
+function setRequest(request) {
+  setObjectState(key, request);
+}
+function getRequest() {
+  let request;
+  getObjectState(v => {
+    request = v;
+  });
+  return request;
+}
+
+function handleRequest(request, response) {
+  response.processAsync();
+  if (request.queryString == "save") {
+    // Get the context structure and finish the old request
+    getObjectState("context", function (obj) {
+      savedCtx = obj.wrappedJSObject;
+      request = savedCtx.request;
+
+      response.setHeader("Content-Type", "application/octet-stream", false);
+      response.setHeader("Access-Control-Allow-Origin", "*", false);
+      response.setHeader("Cache-Control", "no-cache", false);
+      response.setStatusLine(request.httpVersion, 200, "OK");
+
+      const input = request.bodyInputStream;
+      const output = response.bodyOutputStream;
+      let bodyAvail;
+      while ((bodyAvail = input.available()) > 0) {
+        output.writeFrom(input, bodyAvail);
+      }
+      response.finish();
+    });
+    return;
+  } else if (
+    request.queryString == "malformedresult=1" ||
+    request.queryString == "emptyresult=1"
+  ) {
+    jsonOK =
+      request.queryString == "malformedresult=1"
+        ? '{"status":"ok","dat'
+        : '{"status":"ok","data":[]}';
+    response.setHeader("Content-Length", String(jsonOK.length), false);
+    response.setHeader("Content-Type", "application/json", false);
+    response.setHeader("Access-Control-Allow-Origin", "*", false);
+    response.setHeader("Cache-Control", "no-cache", false);
+    response.setStatusLine(request.httpVersion, 200, "OK");
+    response.write(jsonOK, jsonOK.length);
+    response.finish();
+  } else if (request.queryString == "hangup=1") {
+    response.finish();
+  } else if (request.queryString == "return400=1") {
+    jsonOK = "{'message':'Bad header:accept-language-stt'}";
+    response.setHeader("Content-Length", String(jsonOK.length), false);
+    response.setHeader("Content-Type", "application/json", false);
+    response.setHeader("Access-Control-Allow-Origin", "*", false);
+    response.setHeader("Cache-Control", "no-cache", false);
+    response.setStatusLine(request.httpVersion, 400, "Bad Request");
+    response.write(jsonOK, jsonOK.length);
+    response.finish();
+  } else {
+    ctx.wrappedJSObject = ctx;
+    ctx.request = request;
+    setObjectState("context", ctx);
+    jsonOK = '{"status":"ok","data":[{"confidence":0.9085610,"text":"hello"}]}';
+    response.setHeader("Content-Length", String(jsonOK.length), false);
+    response.setHeader("Content-Type", "application/json", false);
+    response.setHeader("Access-Control-Allow-Origin", "*", false);
+    response.setHeader("Cache-Control", "no-cache", false);
+    response.setStatusLine(request.httpVersion, 200, "OK");
+    response.write(jsonOK, jsonOK.length);
+    response.finish();
+  }
+}
diff --git a/dom/media/webspeech/recognition/test/mochitest.ini b/dom/media/webspeech/recognition/test/mochitest.ini
new file mode 100644
index 0000000000..6af13b906c
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/mochitest.ini
@@ -0,0 +1,35 @@
+[DEFAULT]
+tags=mtg
+subsuite = media
+support-files =
+  head.js
+  hello.ogg
+  hello.ogg^headers^
+  http_requesthandler.sjs
+  sinoid+hello.ogg
+  sinoid+hello.ogg^headers^
+  silence.ogg
+  silence.ogg^headers^
+[test_abort.html]
+skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538363
+[test_audio_capture_error.html]
+skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538360
+[test_call_start_from_end_handler.html]
+tags=capturestream
+skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538363
+[test_nested_eventloop.html]
+skip-if = toolkit == 'android'
+[test_online_400_response.html]
+[test_online_hangup.html]
+[test_online_http.html]
+[test_online_http_webkit.html]
+[test_online_malformed_result_handling.html]
+[test_online_empty_result_handling.html]
+[test_preference_enable.html]
+[test_recognition_service_error.html]
+skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538360
+[test_success_without_recognition_service.html]
+skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538360
+[test_timeout.html]
+skip-if =
+  os == "linux" # Bug 1307991 - low frequency on try pushes
diff --git a/dom/media/webspeech/recognition/test/silence.ogg b/dom/media/webspeech/recognition/test/silence.ogg
new file mode 100644
index 0000000000..e6da3a5022
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/silence.ogg
diff --git a/dom/media/webspeech/recognition/test/silence.ogg^headers^ b/dom/media/webspeech/recognition/test/silence.ogg^headers^
new file mode 100644
index 0000000000..4030ea1d3d
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/silence.ogg^headers^
@@ -0,0 +1 @@
+Cache-Control: no-store
diff --git a/dom/media/webspeech/recognition/test/sinoid+hello.ogg b/dom/media/webspeech/recognition/test/sinoid+hello.ogg
new file mode 100644
index 0000000000..7092e82f30
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/sinoid+hello.ogg
diff --git a/dom/media/webspeech/recognition/test/sinoid+hello.ogg^headers^ b/dom/media/webspeech/recognition/test/sinoid+hello.ogg^headers^
new file mode 100644
index 0000000000..4030ea1d3d
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/sinoid+hello.ogg^headers^
@@ -0,0 +1 @@
+Cache-Control: no-store
diff --git a/dom/media/webspeech/recognition/test/test_abort.html b/dom/media/webspeech/recognition/test/test_abort.html
new file mode 100644
index 0000000000..0f22770cc7
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_abort.html
@@ -0,0 +1,73 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Call abort from inside handlers</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  // Abort inside event handlers, should't get a
+  // result after that
+
+  var nextEventIdx = 0;
+  var eventsToAbortOn = [
+    "start",
+    "audiostart",
+    "speechstart",
+    "speechend",
+    "audioend"
+  ];
+
+  function doNextTest() {
+    var nextEvent = eventsToAbortOn[nextEventIdx];
+    var expectedEvents = {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null
+    };
+
+    if (nextEventIdx >= eventsToAbortOn.indexOf("speechstart")) {
+        expectedEvents.speechstart = null;
+    }
+
+    if (nextEventIdx >= eventsToAbortOn.indexOf("speechend")) {
+        expectedEvents.speechend = null;
+    }
+
+    info("Aborting on " + nextEvent);
+    expectedEvents[nextEvent] = function(evt, sr) {
+      sr.abort();
+    };
+
+    nextEventIdx++;
+
+    performTest({
+      eventsToRequest: [],
+      expectedEvents,
+      doneFunc: (nextEventIdx < eventsToAbortOn.length) ? doNextTest : SimpleTest.finish,
+      prefs: [["media.webspeech.test.fake_fsm_events", true],
+              ["media.webspeech.test.fake_recognition_service", true],
+              ["media.webspeech.recognition.timeout", 100000]]
+    });
+  }
+
+  doNextTest();
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_audio_capture_error.html b/dom/media/webspeech/recognition/test/test_audio_capture_error.html
new file mode 100644
index 0000000000..0c054dbf0b
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_audio_capture_error.html
@@ -0,0 +1,42 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Behavior on audio error</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: ['EVENT_AUDIO_ERROR'],
+    expectedEvents: {
+      'start': null,
+      'audiostart': null,
+      'speechstart': null,
+      'speechend': null,
+      'audioend': null,
+      'error': buildErrorCallback(errorCodes.AUDIO_CAPTURE),
+      'end': null
+    },
+    doneFunc: SimpleTest.finish,
+    prefs: [["media.webspeech.test.fake_fsm_events", true],
+            ["media.webspeech.test.fake_recognition_service", true],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_call_start_from_end_handler.html b/dom/media/webspeech/recognition/test/test_call_start_from_end_handler.html
new file mode 100644
index 0000000000..895648ad9e
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_call_start_from_end_handler.html
@@ -0,0 +1,102 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Restart recognition from end handler</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  function createAudioStream() {
+    var audioTag = document.createElement("audio");
+    audioTag.src = DEFAULT_AUDIO_SAMPLE_FILE;
+
+    var stream = audioTag.mozCaptureStreamUntilEnded();
+    audioTag.play();
+
+    return stream;
+  }
+
+  var done = false;
+  function endHandler(evt, sr) {
+    if (done) {
+      SimpleTest.finish();
+      return;
+    }
+
+    try {
+      var stream = createAudioStream();
+      sr.start(stream); // shouldn't fail
+    } catch (err) {
+      ok(false, "Failed to start() from end() callback");
+    }
+
+    // calling start() may cause some callbacks to fire, but we're
+    // no longer interested in them, except for onend, which is where
+    // we'll conclude the test.
+    sr.onstart = null;
+    sr.onaudiostart = null;
+    sr.onspeechstart = null;
+    sr.onspeechend = null;
+    sr.onaudioend = null;
+    sr.onresult = null;
+
+    // FIXME(ggp) the state transition caused by start() is async,
+    // but abort() is sync (see bug 1055093). until we normalize
+    // state transitions, we need to setTimeout here to make sure
+    // abort() finds the speech recognition object in the correct
+    // state (namely, STATE_STARTING).
+    setTimeout(function() {
+      sr.abort();
+      done = true;
+    });
+
+    info("Successfully start() from end() callback");
+  }
+
+  function expectExceptionHandler(evt, sr) {
+    try {
+      sr.start(createAudioStream());
+    } catch (err) {
+      is(err.name, "InvalidStateError");
+      return;
+    }
+
+    ok(false, "Calling start() didn't raise InvalidStateError");
+  }
+
+  performTest({
+    eventsToRequest: [
+      'EVENT_RECOGNITIONSERVICE_FINAL_RESULT'
+    ],
+    expectedEvents: {
+      'start': expectExceptionHandler,
+      'audiostart': expectExceptionHandler,
+      'speechstart': expectExceptionHandler,
+      'speechend': expectExceptionHandler,
+      'audioend': expectExceptionHandler,
+      'result': buildResultCallback("Mock final result"),
+      'end': endHandler,
+    },
+    prefs: [["media.webspeech.test.fake_fsm_events", true],
+            ["media.webspeech.test.fake_recognition_service", true],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_nested_eventloop.html b/dom/media/webspeech/recognition/test/test_nested_eventloop.html
new file mode 100644
index 0000000000..4924766b44
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_nested_eventloop.html
@@ -0,0 +1,82 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Spin the event loop from inside a callback</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  /*
+   * SpecialPowers.spinEventLoop can be used to spin the event loop, causing
+   * queued SpeechEvents (such as those created by calls to start(), stop()
+   * or abort()) to be processed immediately.
+   * When this is done from inside DOM event handlers, it is possible to
+   * cause reentrancy in our C++ code, which we should be able to withstand.
+   */
+  function abortAndSpinEventLoop(evt, sr) {
+    sr.abort();
+    SpecialPowers.spinEventLoop(window);
+  }
+  function doneFunc() {
+    // Trigger gc now and wait some time to make sure this test gets the blame
+    // for any assertions caused by spinning the event loop.
+    //
+    // NB - The assertions should be gone, but this looks too scary to touch
+    // during batch cleanup.
+    var count = 0, GC_COUNT = 4;
+
+    function triggerGCOrFinish() {
+      SpecialPowers.gc();
+      count++;
+
+      if (count == GC_COUNT) {
+        SimpleTest.finish();
+      }
+    }
+
+    for (var i = 0; i < GC_COUNT; i++) {
+      setTimeout(triggerGCOrFinish, 0);
+    }
+  }
+
+  /*
+   * We start by performing a normal start, then abort from the audiostart
+   * callback and force the EVENT_ABORT to be processed while still inside
+   * the event handler. This causes the recording to stop, which raises
+   * the audioend and (later on) end events.
+   * Then, we abort (once again spinning the event loop) from the audioend
+   * handler, attempting to cause a re-entry into the abort code. This second
+   * call should be ignored, and we get the end callback and finish.
+   */
+
+  performTest({
+    eventsToRequest: [],
+    expectedEvents: {
+      "audiostart": abortAndSpinEventLoop,
+      "audioend": abortAndSpinEventLoop,
+      "end": null
+    },
+    doneFunc,
+    prefs: [["media.webspeech.test.fake_fsm_events", true],
+            ["media.webspeech.test.fake_recognition_service", true],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_online_400_response.html b/dom/media/webspeech/recognition/test/test_online_400_response.html
new file mode 100644
index 0000000000..1a7d0ed452
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_online_400_response.html
@@ -0,0 +1,47 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
+The intent of this file is to test the speech recognition service behavior
+whenever the server returns a 400 error
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1248897 -- Online speech service</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [],
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null,
+      'error': buildErrorCallback(errorCodes.NETWORK),
+      "speechstart": null,
+      "speechend": null
+    },
+    doneFunc: SimpleTest.finish,
+    prefs: [["media.webspeech.recognition.enable", true],
+            ["media.webspeech.recognition.force_enable", true],
+            ["media.webspeech.service.endpoint",
+              "http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?return400=1"],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_online_empty_result_handling.html b/dom/media/webspeech/recognition/test/test_online_empty_result_handling.html
new file mode 100644
index 0000000000..46f1e7e0cb
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_online_empty_result_handling.html
@@ -0,0 +1,48 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
+The intent of this file is to test the speech recognition service behavior
+whenever the server returns a valid json object, but without any transcription
+results on it, for example: `{"status":"ok","data":[]}`
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1248897 -- Online speech service</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [],
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null,
+      'error': buildErrorCallback(errorCodes.NETWORK),
+      "speechstart": null,
+      "speechend": null
+    },
+    doneFunc: SimpleTest.finish,
+    prefs: [["media.webspeech.recognition.enable", true],
+            ["media.webspeech.recognition.force_enable", true],
+            ["media.webspeech.service.endpoint",
+              "http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?emptyresult=1"],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_online_hangup.html b/dom/media/webspeech/recognition/test/test_online_hangup.html
new file mode 100644
index 0000000000..4a46f80f8f
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_online_hangup.html
@@ -0,0 +1,47 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
+The intent of this file is to test the speech recognition service behavior
+whenever the server hangups the connection without sending any response
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1248897 -- Online speech service</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [],
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null,
+      'error': buildErrorCallback(errorCodes.NETWORK),
+      "speechstart": null,
+      "speechend": null
+    },
+    doneFunc: SimpleTest.finish,
+    prefs: [["media.webspeech.recognition.enable", true],
+            ["media.webspeech.recognition.force_enable", true],
+            ["media.webspeech.service.endpoint",
+              "http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?hangup=1"],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_online_http.html b/dom/media/webspeech/recognition/test/test_online_http.html
new file mode 100644
index 0000000000..43be7a656a
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_online_http.html
@@ -0,0 +1,89 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
+The intent of this file is to test a successfull speech recognition request and
+that audio is being properly encoded
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1248897 -- Online speech service</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  async function validateRawAudio(buffer) {
+    const ac = new AudioContext();
+    const decodedData = await ac.decodeAudioData(buffer);
+    const source = ac.createBufferSource();
+    source.buffer = decodedData;
+    source.loop = true;
+    const analyser = ac.createAnalyser();
+    analyser.smoothingTimeConstant = 0.2;
+    analyser.fftSize = 1024;
+    source.connect(analyser);
+    const binIndexForFrequency = frequency =>
+      1 + Math.round(frequency * analyser.fftSize / ac.sampleRate);
+    source.start();
+    const data = new Uint8Array(analyser.frequencyBinCount);
+    const start = performance.now();
+    while (true) {
+      if (performance.now() - start > 10000) {
+        return false;
+        break;
+      }
+      analyser.getByteFrequencyData(data);
+      if (data[binIndexForFrequency(200)] < 50 &&
+          data[binIndexForFrequency(440)] > 180 &&
+          data[binIndexForFrequency(1000)] < 50) {
+        return true;
+        break;
+      }
+      await new Promise(r => requestAnimationFrame(r));
+    }
+  }
+
+  async function verifyEncodedAudio(requestUrl) {
+    try {
+      const response = await fetch(requestUrl);
+      const buffer = await response.arrayBuffer();
+      ok(await validateRawAudio(buffer), "Audio encoding is valid");
+    } catch(e) {
+      ok(false, e);
+    } finally {
+      SimpleTest.finish();
+    }
+  }
+
+  performTest({
+    eventsToRequest: {},
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null,
+      "result": () => verifyEncodedAudio("http_requesthandler.sjs?save"),
+      "speechstart": null,
+      "speechend": null
+    },
+    audioSampleFile: "sinoid+hello.ogg",
+    prefs: [["media.webspeech.recognition.enable", true],
+            ["media.webspeech.recognition.force_enable", true],
+            ["media.webspeech.service.endpoint",
+              "http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs"],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_online_http_webkit.html b/dom/media/webspeech/recognition/test/test_online_http_webkit.html
new file mode 100644
index 0000000000..7f6c7e6d7d
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_online_http_webkit.html
@@ -0,0 +1,90 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
+The intent of this file is to test a successfull speech recognition request and
+that audio is being properly encoded
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1248897 -- Online speech service</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  async function validateRawAudio(buffer) {
+    const ac = new AudioContext();
+    const decodedData = await ac.decodeAudioData(buffer);
+    const source = ac.createBufferSource();
+    source.buffer = decodedData;
+    source.loop = true;
+    const analyser = ac.createAnalyser();
+    analyser.smoothingTimeConstant = 0.2;
+    analyser.fftSize = 1024;
+    source.connect(analyser);
+    const binIndexForFrequency = frequency =>
+      1 + Math.round(frequency * analyser.fftSize / ac.sampleRate);
+    source.start();
+    const data = new Uint8Array(analyser.frequencyBinCount);
+    const start = performance.now();
+    while (true) {
+      if (performance.now() - start > 10000) {
+        return false;
+        break;
+      }
+      analyser.getByteFrequencyData(data);
+      if (data[binIndexForFrequency(200)] < 50 &&
+          data[binIndexForFrequency(440)] > 180 &&
+          data[binIndexForFrequency(1000)] < 50) {
+        return true;
+        break;
+      }
+      await new Promise(r => requestAnimationFrame(r));
+    }
+  }
+
+  async function verifyEncodedAudio(requestUrl) {
+    try {
+      const response = await fetch(requestUrl);
+      const buffer = await response.arrayBuffer();
+      ok(await validateRawAudio(buffer), "Audio encoding is valid");
+    } catch(e) {
+      ok(false, e);
+    } finally {
+      SimpleTest.finish();
+    }
+  }
+
+  performTest({
+    eventsToRequest: {},
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null,
+      "result": () => verifyEncodedAudio("http_requesthandler.sjs?save"),
+      "speechstart": null,
+      "speechend": null
+    },
+    audioSampleFile: "sinoid+hello.ogg",
+    prefs: [["media.webspeech.recognition.enable", true],
+            ["media.webspeech.recognition.force_enable", true],
+            ["media.webspeech.service.endpoint",
+              "http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs"],
+            ["media.webspeech.recognition.timeout", 100000]],
+    webkit: true
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_online_malformed_result_handling.html b/dom/media/webspeech/recognition/test/test_online_malformed_result_handling.html
new file mode 100644
index 0000000000..b071a46ea3
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_online_malformed_result_handling.html
@@ -0,0 +1,48 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
+The intent of this file is to test the speech recognition service behavior
+whenever the server returns an invalid/corrupted json object, for example:
+`{"status":"ok","dat`
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1248897 -- Online speech service</title>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [],
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "end": null,
+      'error': buildErrorCallback(errorCodes.NETWORK),
+      "speechstart": null,
+      "speechend": null
+    },
+    doneFunc: SimpleTest.finish,
+    prefs: [["media.webspeech.recognition.enable", true],
+            ["media.webspeech.recognition.force_enable", true],
+            ["media.webspeech.service.endpoint",
+              "http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?malformedresult=1"],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_preference_enable.html b/dom/media/webspeech/recognition/test/test_preference_enable.html
new file mode 100644
index 0000000000..2b56f82e2c
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_preference_enable.html
@@ -0,0 +1,43 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- No objects should be visible with preference disabled</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  SpecialPowers.pushPrefEnv({
+    set: [["media.webspeech.recognition.enable", false]]
+  }, function() {
+    var objects = [
+      "SpeechRecognition",
+      "SpeechGrammar",
+      "SpeechRecognitionResult",
+      "SpeechRecognitionResultList",
+      "SpeechRecognitionAlternative"
+    ];
+
+    for (var i = 0; i < objects.length; i++) {
+      is(window[objects[i]], undefined,
+         objects[i] + " should be undefined with pref off");
+    }
+
+    SimpleTest.finish();
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_recognition_service_error.html b/dom/media/webspeech/recognition/test/test_recognition_service_error.html
new file mode 100644
index 0000000000..e8e59e2afc
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_recognition_service_error.html
@@ -0,0 +1,45 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Behavior on recognition service error</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [
+      'EVENT_RECOGNITIONSERVICE_ERROR'
+    ],
+    expectedEvents: {
+      'start': null,
+      'audiostart': null,
+      'speechstart': null,
+      'speechend': null,
+      'audioend': null,
+      'error': buildErrorCallback(errorCodes.NETWORK),
+      'end': null
+    },
+    doneFunc: SimpleTest.finish,
+    prefs: [["media.webspeech.test.fake_fsm_events", true],
+            ["media.webspeech.test.fake_recognition_service", true],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_success_without_recognition_service.html b/dom/media/webspeech/recognition/test/test_success_without_recognition_service.html
new file mode 100644
index 0000000000..38748ed5cb
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_success_without_recognition_service.html
@@ -0,0 +1,45 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Success with fake recognition service</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [
+      'EVENT_RECOGNITIONSERVICE_FINAL_RESULT'
+    ],
+    expectedEvents: {
+      'start': null,
+      'audiostart': null,
+      'speechstart': null,
+      'speechend': null,
+      'audioend': null,
+      'result': buildResultCallback("Mock final result"),
+      'end': null
+    },
+    doneFunc:SimpleTest.finish,
+    prefs: [["media.webspeech.test.fake_fsm_events", true],
+            ["media.webspeech.test.fake_recognition_service", true],
+            ["media.webspeech.recognition.timeout", 100000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/recognition/test/test_timeout.html b/dom/media/webspeech/recognition/test/test_timeout.html
new file mode 100644
index 0000000000..8334c9e779
--- /dev/null
+++ b/dom/media/webspeech/recognition/test/test_timeout.html
@@ -0,0 +1,42 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295 -- Timeout for user speech</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+  <script type="application/javascript" src="head.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="text/javascript">
+  SimpleTest.waitForExplicitFinish();
+
+  performTest({
+    eventsToRequest: [],
+    expectedEvents: {
+      "start": null,
+      "audiostart": null,
+      "audioend": null,
+      "error": buildErrorCallback(errorCodes.NO_SPEECH),
+      "end": null
+    },
+    doneFunc: SimpleTest.finish,
+    audioSampleFile: "silence.ogg",
+    prefs: [["media.webspeech.test.fake_fsm_events", true],
+            ["media.webspeech.test.fake_recognition_service", true],
+            ["media.webspeech.recognition.timeout", 1000]]
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp
new file mode 100644
index 0000000000..20e3ef754b
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -0,0 +1,315 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupportsPrimitives.h"
+#include "nsSpeechTask.h"
+#include "mozilla/Logging.h"
+
+#include "mozilla/dom/Element.h"
+
+#include "mozilla/dom/SpeechSynthesisBinding.h"
+#include "mozilla/dom/WindowGlobalChild.h"
+#include "SpeechSynthesis.h"
+#include "nsContentUtils.h"
+#include "nsSynthVoiceRegistry.h"
+#include "mozilla/dom/Document.h"
+#include "nsIDocShell.h"
+
+#undef LOG
+mozilla::LogModule* GetSpeechSynthLog() {
+  static mozilla::LazyLogModule sLog("SpeechSynthesis");
+
+  return sLog;
+}
+#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis)
+
+NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis,
+                                                DOMEventTargetHelper)
+  NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask)
+  NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue)
+  tmp->mVoiceCache.Clear();
+  NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
+NS_IMPL_CYCLE_COLLECTION_UNLINK_END
+
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis,
+                                                  DOMEventTargetHelper)
+  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask)
+  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue)
+  for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) {
+    cb.NoteXPCOMChild(voice);
+  }
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
+
+NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
+
+SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent)
+    : DOMEventTargetHelper(aParent),
+      mHoldQueue(false),
+      mInnerID(aParent->WindowID()) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+  if (obs) {
+    obs->AddObserver(this, "inner-window-destroyed", true);
+    obs->AddObserver(this, "synth-voices-changed", true);
+  }
+}
+
+SpeechSynthesis::~SpeechSynthesis() = default;
+
+JSObject* SpeechSynthesis::WrapObject(JSContext* aCx,
+                                      JS::Handle<JSObject*> aGivenProto) {
+  return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+bool SpeechSynthesis::Pending() const {
+  // If we don't have any task, nothing is pending. If we have only one task,
+  // check if that task is currently pending. If we have more than one task,
+  // then the tasks after the first one are definitely pending.
+  return mSpeechQueue.Length() > 1 ||
+         (mSpeechQueue.Length() == 1 &&
+          (!mCurrentTask || mCurrentTask->IsPending()));
+}
+
+bool SpeechSynthesis::Speaking() const {
+  // Check global speaking state if there is no active speaking task.
+  return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) ||
+         nsSynthVoiceRegistry::GetInstance()->IsSpeaking();
+}
+
+bool SpeechSynthesis::Paused() const {
+  return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||
+         (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());
+}
+
+bool SpeechSynthesis::HasEmptyQueue() const {
+  return mSpeechQueue.Length() == 0;
+}
+
+bool SpeechSynthesis::HasVoices() const {
+  uint32_t voiceCount = mVoiceCache.Count();
+  if (voiceCount == 0) {
+    nsresult rv =
+        nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      return false;
+    }
+  }
+
+  return voiceCount != 0;
+}
+
+void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) {
+  if (!mInnerID) {
+    return;
+  }
+
+  mSpeechQueue.AppendElement(&aUtterance);
+
+  if (mSpeechQueue.Length() == 1) {
+    RefPtr<WindowGlobalChild> wgc =
+        WindowGlobalChild::GetByInnerWindowId(mInnerID);
+    if (wgc) {
+      wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
+    }
+
+    // If we only have one item in the queue, we aren't pre-paused, and
+    // we have voices available, speak it.
+    if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+      AdvanceQueue();
+    }
+  }
+}
+
+void SpeechSynthesis::AdvanceQueue() {
+  LOG(LogLevel::Debug,
+      ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length()));
+
+  if (mSpeechQueue.IsEmpty()) {
+    return;
+  }
+
+  RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0);
+
+  nsAutoString docLang;
+  nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+  if (Document* doc = window ? window->GetExtantDoc() : nullptr) {
+    if (Element* elm = doc->GetHtmlElement()) {
+      elm->GetLang(docLang);
+    }
+  }
+
+  mCurrentTask =
+      nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang);
+
+  if (mCurrentTask) {
+    mCurrentTask->SetSpeechSynthesis(this);
+  }
+}
+
+void SpeechSynthesis::Cancel() {
+  if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
+    // Remove all queued utterances except for current one, we will remove it
+    // in OnEnd
+    mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1);
+  } else {
+    mSpeechQueue.Clear();
+  }
+
+  if (mCurrentTask) {
+    mCurrentTask->Cancel();
+  }
+}
+
+void SpeechSynthesis::Pause() {
+  if (Paused()) {
+    return;
+  }
+
+  if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
+    mCurrentTask->Pause();
+  } else {
+    mHoldQueue = true;
+  }
+}
+
+void SpeechSynthesis::Resume() {
+  if (!Paused()) {
+    return;
+  }
+
+  mHoldQueue = false;
+
+  if (mCurrentTask) {
+    mCurrentTask->Resume();
+  } else {
+    AdvanceQueue();
+  }
+}
+
+void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) {
+  MOZ_ASSERT(mCurrentTask == aTask);
+
+  if (!mSpeechQueue.IsEmpty()) {
+    mSpeechQueue.RemoveElementAt(0);
+    if (mSpeechQueue.IsEmpty()) {
+      RefPtr<WindowGlobalChild> wgc =
+          WindowGlobalChild::GetByInnerWindowId(mInnerID);
+      if (wgc) {
+        wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
+      }
+    }
+  }
+
+  mCurrentTask = nullptr;
+  AdvanceQueue();
+}
+
+void SpeechSynthesis::GetVoices(
+    nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) {
+  aResult.Clear();
+  uint32_t voiceCount = 0;
+  nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+  nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+  if (nsContentUtils::ShouldResistFingerprinting(docShell,
+                                                 RFPTarget::SpeechSynthesis)) {
+    return;
+  }
+
+  nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    return;
+  }
+
+  nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
+
+  for (uint32_t i = 0; i < voiceCount; i++) {
+    nsAutoString uri;
+    rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri);
+
+    if (NS_FAILED(rv)) {
+      NS_WARNING("Failed to retrieve voice from registry");
+      continue;
+    }
+
+    SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri);
+
+    if (!voice) {
+      voice = new SpeechSynthesisVoice(voiceParent, uri);
+    }
+
+    aResult.AppendElement(voice);
+  }
+
+  mVoiceCache.Clear();
+
+  for (uint32_t i = 0; i < aResult.Length(); i++) {
+    SpeechSynthesisVoice* voice = aResult[i];
+    mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice});
+  }
+}
+
+// For testing purposes, allows us to cancel the current task that is
+// misbehaving, and flush the queue.
+void SpeechSynthesis::ForceEnd() {
+  if (mCurrentTask) {
+    mCurrentTask->ForceEnd();
+  }
+}
+
+NS_IMETHODIMP
+SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic,
+                         const char16_t* aData) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (strcmp(aTopic, "inner-window-destroyed") == 0) {
+    nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject);
+    NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE);
+
+    uint64_t innerID;
+    nsresult rv = wrapper->GetData(&innerID);
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    if (innerID == mInnerID) {
+      mInnerID = 0;
+      Cancel();
+
+      nsCOMPtr<nsIObserverService> obs =
+          mozilla::services::GetObserverService();
+      if (obs) {
+        obs->RemoveObserver(this, "inner-window-destroyed");
+      }
+    }
+  } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
+    LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
+    nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+    nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+    if (!nsContentUtils::ShouldResistFingerprinting(
+            docShell, RFPTarget::SpeechSynthesis)) {
+      DispatchTrustedEvent(u"voiceschanged"_ns);
+      // If we have a pending item, and voices become available, speak it.
+      if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+        AdvanceQueue();
+      }
+    }
+  }
+
+  return NS_OK;
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.h b/dom/media/webspeech/synth/SpeechSynthesis.h
new file mode 100644
index 0000000000..1227261b59
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesis.h
@@ -0,0 +1,88 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechSynthesis_h
+#define mozilla_dom_SpeechSynthesis_h
+
+#include "nsCOMPtr.h"
+#include "nsIObserver.h"
+#include "nsRefPtrHashtable.h"
+#include "nsString.h"
+#include "nsWeakReference.h"
+#include "nsWrapperCache.h"
+#include "js/TypeDecls.h"
+
+#include "SpeechSynthesisUtterance.h"
+#include "SpeechSynthesisVoice.h"
+
+class nsIDOMWindow;
+
+namespace mozilla::dom {
+
+class nsSpeechTask;
+
+class SpeechSynthesis final : public DOMEventTargetHelper,
+                              public nsIObserver,
+                              public nsSupportsWeakReference {
+ public:
+  explicit SpeechSynthesis(nsPIDOMWindowInner* aParent);
+
+  NS_DECL_ISUPPORTS_INHERITED
+  NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechSynthesis,
+                                           DOMEventTargetHelper)
+  NS_DECL_NSIOBSERVER
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  bool Pending() const;
+
+  bool Speaking() const;
+
+  bool Paused() const;
+
+  bool HasEmptyQueue() const;
+
+  void Speak(SpeechSynthesisUtterance& aUtterance);
+
+  void Cancel();
+
+  void Pause();
+
+  void Resume();
+
+  void OnEnd(const nsSpeechTask* aTask);
+
+  void GetVoices(nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult);
+
+  void ForceEnd();
+
+  IMPL_EVENT_HANDLER(voiceschanged)
+
+ private:
+  virtual ~SpeechSynthesis();
+
+  void AdvanceQueue();
+
+  bool HasVoices() const;
+
+  bool HasSpeakingTask() const {
+    return mCurrentTask && mCurrentTask->IsSpeaking();
+  }
+
+  nsTArray<RefPtr<SpeechSynthesisUtterance> > mSpeechQueue;
+
+  RefPtr<nsSpeechTask> mCurrentTask;
+
+  nsRefPtrHashtable<nsStringHashKey, SpeechSynthesisVoice> mVoiceCache;
+
+  bool mHoldQueue;
+
+  uint64_t mInnerID;
+};
+
+}  // namespace mozilla::dom
+#endif
diff --git a/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp b/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp
new file mode 100644
index 0000000000..4d8dcd5c12
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesisUtterance.cpp
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsGkAtoms.h"
+
+#include "mozilla/dom/SpeechSynthesisEvent.h"
+#include "mozilla/dom/SpeechSynthesisUtteranceBinding.h"
+#include "SpeechSynthesisUtterance.h"
+#include "SpeechSynthesisVoice.h"
+
+#include <stdlib.h>
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_INHERITED(SpeechSynthesisUtterance,
+                                   DOMEventTargetHelper, mVoice);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesisUtterance)
+NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
+
+NS_IMPL_ADDREF_INHERITED(SpeechSynthesisUtterance, DOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(SpeechSynthesisUtterance, DOMEventTargetHelper)
+
+SpeechSynthesisUtterance::SpeechSynthesisUtterance(
+    nsPIDOMWindowInner* aOwnerWindow, const nsAString& text)
+    : DOMEventTargetHelper(aOwnerWindow),
+      mText(text),
+      mVolume(1),
+      mRate(1),
+      mPitch(1),
+      mPaused(false),
+      mShouldResistFingerprinting(
+          aOwnerWindow->AsGlobal()->ShouldResistFingerprinting(
+              RFPTarget::SpeechSynthesis)) {}
+
+SpeechSynthesisUtterance::~SpeechSynthesisUtterance() = default;
+
+JSObject* SpeechSynthesisUtterance::WrapObject(
+    JSContext* aCx, JS::Handle<JSObject*> aGivenProto) {
+  return SpeechSynthesisUtterance_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+nsISupports* SpeechSynthesisUtterance::GetParentObject() const {
+  return GetOwner();
+}
+
+already_AddRefed<SpeechSynthesisUtterance>
+SpeechSynthesisUtterance::Constructor(GlobalObject& aGlobal, ErrorResult& aRv) {
+  return Constructor(aGlobal, u""_ns, aRv);
+}
+
+already_AddRefed<SpeechSynthesisUtterance>
+SpeechSynthesisUtterance::Constructor(GlobalObject& aGlobal,
+                                      const nsAString& aText,
+                                      ErrorResult& aRv) {
+  nsCOMPtr<nsPIDOMWindowInner> win = do_QueryInterface(aGlobal.GetAsSupports());
+
+  if (!win) {
+    aRv.Throw(NS_ERROR_FAILURE);
+    return nullptr;
+  }
+
+  RefPtr<SpeechSynthesisUtterance> object =
+      new SpeechSynthesisUtterance(win, aText);
+  return object.forget();
+}
+
+void SpeechSynthesisUtterance::GetText(nsString& aResult) const {
+  aResult = mText;
+}
+
+void SpeechSynthesisUtterance::SetText(const nsAString& aText) {
+  mText = aText;
+}
+
+void SpeechSynthesisUtterance::GetLang(nsString& aResult) const {
+  aResult = mLang;
+}
+
+void SpeechSynthesisUtterance::SetLang(const nsAString& aLang) {
+  mLang = aLang;
+}
+
+SpeechSynthesisVoice* SpeechSynthesisUtterance::GetVoice() const {
+  return mVoice;
+}
+
+void SpeechSynthesisUtterance::SetVoice(SpeechSynthesisVoice* aVoice) {
+  mVoice = aVoice;
+}
+
+float SpeechSynthesisUtterance::Volume() const { return mVolume; }
+
+void SpeechSynthesisUtterance::SetVolume(float aVolume) {
+  mVolume = std::max<float>(std::min<float>(aVolume, 1), 0);
+}
+
+float SpeechSynthesisUtterance::Rate() const { return mRate; }
+
+void SpeechSynthesisUtterance::SetRate(float aRate) {
+  mRate = std::max<float>(std::min<float>(aRate, 10), 0.1f);
+}
+
+float SpeechSynthesisUtterance::Pitch() const { return mPitch; }
+
+void SpeechSynthesisUtterance::SetPitch(float aPitch) {
+  mPitch = std::max<float>(std::min<float>(aPitch, 2), 0);
+}
+
+void SpeechSynthesisUtterance::GetChosenVoiceURI(nsString& aResult) const {
+  aResult = mChosenVoiceURI;
+}
+
+void SpeechSynthesisUtterance::DispatchSpeechSynthesisEvent(
+    const nsAString& aEventType, uint32_t aCharIndex,
+    const Nullable<uint32_t>& aCharLength, float aElapsedTime,
+    const nsAString& aName) {
+  SpeechSynthesisEventInit init;
+  init.mBubbles = false;
+  init.mCancelable = false;
+  init.mUtterance = this;
+  init.mCharIndex = aCharIndex;
+  init.mCharLength = aCharLength;
+  init.mElapsedTime = aElapsedTime;
+  init.mName = aName;
+
+  RefPtr<SpeechSynthesisEvent> event =
+      SpeechSynthesisEvent::Constructor(this, aEventType, init);
+  DispatchTrustedEvent(event);
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/SpeechSynthesisUtterance.h b/dom/media/webspeech/synth/SpeechSynthesisUtterance.h
new file mode 100644
index 0000000000..17958a3b32
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesisUtterance.h
@@ -0,0 +1,115 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechSynthesisUtterance_h
+#define mozilla_dom_SpeechSynthesisUtterance_h
+
+#include "mozilla/DOMEventTargetHelper.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "js/TypeDecls.h"
+
+#include "nsSpeechTask.h"
+
+namespace mozilla::dom {
+
+class SpeechSynthesisVoice;
+class SpeechSynthesis;
+class nsSynthVoiceRegistry;
+
+class SpeechSynthesisUtterance final : public DOMEventTargetHelper {
+  friend class SpeechSynthesis;
+  friend class nsSpeechTask;
+  friend class nsSynthVoiceRegistry;
+
+ public:
+  SpeechSynthesisUtterance(nsPIDOMWindowInner* aOwnerWindow,
+                           const nsAString& aText);
+
+  NS_DECL_ISUPPORTS_INHERITED
+  NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechSynthesisUtterance,
+                                           DOMEventTargetHelper)
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  static already_AddRefed<SpeechSynthesisUtterance> Constructor(
+      GlobalObject& aGlobal, ErrorResult& aRv);
+  static already_AddRefed<SpeechSynthesisUtterance> Constructor(
+      GlobalObject& aGlobal, const nsAString& aText, ErrorResult& aRv);
+
+  void GetText(nsString& aResult) const;
+
+  void SetText(const nsAString& aText);
+
+  void GetLang(nsString& aResult) const;
+
+  void SetLang(const nsAString& aLang);
+
+  SpeechSynthesisVoice* GetVoice() const;
+
+  void SetVoice(SpeechSynthesisVoice* aVoice);
+
+  float Volume() const;
+
+  void SetVolume(float aVolume);
+
+  float Rate() const;
+
+  void SetRate(float aRate);
+
+  float Pitch() const;
+
+  void SetPitch(float aPitch);
+
+  void GetChosenVoiceURI(nsString& aResult) const;
+
+  bool IsPaused() { return mPaused; }
+
+  bool ShouldResistFingerprinting() const {
+    return mShouldResistFingerprinting;
+  }
+
+  IMPL_EVENT_HANDLER(start)
+  IMPL_EVENT_HANDLER(end)
+  IMPL_EVENT_HANDLER(error)
+  IMPL_EVENT_HANDLER(pause)
+  IMPL_EVENT_HANDLER(resume)
+  IMPL_EVENT_HANDLER(mark)
+  IMPL_EVENT_HANDLER(boundary)
+
+ private:
+  virtual ~SpeechSynthesisUtterance();
+
+  void DispatchSpeechSynthesisEvent(const nsAString& aEventType,
+                                    uint32_t aCharIndex,
+                                    const Nullable<uint32_t>& aCharLength,
+                                    float aElapsedTime, const nsAString& aName);
+
+  nsString mText;
+
+  nsString mLang;
+
+  float mVolume;
+
+  float mRate;
+
+  float mPitch;
+
+  nsString mChosenVoiceURI;
+
+  bool mPaused;
+
+  RefPtr<SpeechSynthesisVoice> mVoice;
+
+  bool mShouldResistFingerprinting;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp b/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp
new file mode 100644
index 0000000000..a309daca26
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesisVoice.cpp
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechSynthesis.h"
+#include "nsSynthVoiceRegistry.h"
+#include "mozilla/dom/SpeechSynthesisVoiceBinding.h"
+
+namespace mozilla::dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechSynthesisVoice, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechSynthesisVoice)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechSynthesisVoice)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesisVoice)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechSynthesisVoice::SpeechSynthesisVoice(nsISupports* aParent,
+                                           const nsAString& aUri)
+    : mParent(aParent), mUri(aUri) {}
+
+SpeechSynthesisVoice::~SpeechSynthesisVoice() = default;
+
+JSObject* SpeechSynthesisVoice::WrapObject(JSContext* aCx,
+                                           JS::Handle<JSObject*> aGivenProto) {
+  return SpeechSynthesisVoice_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+nsISupports* SpeechSynthesisVoice::GetParentObject() const { return mParent; }
+
+void SpeechSynthesisVoice::GetVoiceURI(nsString& aRetval) const {
+  aRetval = mUri;
+}
+
+void SpeechSynthesisVoice::GetName(nsString& aRetval) const {
+  DebugOnly<nsresult> rv =
+      nsSynthVoiceRegistry::GetInstance()->GetVoiceName(mUri, aRetval);
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
+                       "Failed to get SpeechSynthesisVoice.name");
+}
+
+void SpeechSynthesisVoice::GetLang(nsString& aRetval) const {
+  DebugOnly<nsresult> rv =
+      nsSynthVoiceRegistry::GetInstance()->GetVoiceLang(mUri, aRetval);
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
+                       "Failed to get SpeechSynthesisVoice.lang");
+}
+
+bool SpeechSynthesisVoice::LocalService() const {
+  bool isLocal;
+  DebugOnly<nsresult> rv =
+      nsSynthVoiceRegistry::GetInstance()->IsLocalVoice(mUri, &isLocal);
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
+                       "Failed to get SpeechSynthesisVoice.localService");
+
+  return isLocal;
+}
+
+bool SpeechSynthesisVoice::Default() const {
+  bool isDefault;
+  DebugOnly<nsresult> rv =
+      nsSynthVoiceRegistry::GetInstance()->IsDefaultVoice(mUri, &isDefault);
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
+                       "Failed to get SpeechSynthesisVoice.default");
+
+  return isDefault;
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/SpeechSynthesisVoice.h b/dom/media/webspeech/synth/SpeechSynthesisVoice.h
new file mode 100644
index 0000000000..079e5f49ea
--- /dev/null
+++ b/dom/media/webspeech/synth/SpeechSynthesisVoice.h
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechSynthesisVoice_h
+#define mozilla_dom_SpeechSynthesisVoice_h
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "js/TypeDecls.h"
+
+namespace mozilla::dom {
+
+class nsSynthVoiceRegistry;
+class SpeechSynthesis;
+
+class SpeechSynthesisVoice final : public nsISupports, public nsWrapperCache {
+  friend class nsSynthVoiceRegistry;
+  friend class SpeechSynthesis;
+
+ public:
+  SpeechSynthesisVoice(nsISupports* aParent, const nsAString& aUri);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(SpeechSynthesisVoice)
+
+  nsISupports* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  void GetVoiceURI(nsString& aRetval) const;
+
+  void GetName(nsString& aRetval) const;
+
+  void GetLang(nsString& aRetval) const;
+
+  bool LocalService() const;
+
+  bool Default() const;
+
+ private:
+  virtual ~SpeechSynthesisVoice();
+
+  nsCOMPtr<nsISupports> mParent;
+
+  nsString mUri;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/android/SpeechSynthesisService.cpp b/dom/media/webspeech/synth/android/SpeechSynthesisService.cpp
new file mode 100644
index 0000000000..1b6e4b6125
--- /dev/null
+++ b/dom/media/webspeech/synth/android/SpeechSynthesisService.cpp
@@ -0,0 +1,215 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechSynthesisService.h"
+
+#include <android/log.h>
+
+#include "nsXULAppAPI.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/jni/Utils.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPrefs_media.h"
+
+#define ALOG(args...) \
+  __android_log_print(ANDROID_LOG_INFO, "GeckoSpeechSynthesis", ##args)
+
+namespace mozilla {
+namespace dom {
+
+StaticRefPtr<SpeechSynthesisService> SpeechSynthesisService::sSingleton;
+
+class AndroidSpeechCallback final : public nsISpeechTaskCallback {
+ public:
+  AndroidSpeechCallback() {}
+
+  NS_DECL_ISUPPORTS
+
+  NS_IMETHOD OnResume() override { return NS_OK; }
+
+  NS_IMETHOD OnPause() override { return NS_OK; }
+
+  NS_IMETHOD OnCancel() override {
+    java::SpeechSynthesisService::Stop();
+    return NS_OK;
+  }
+
+  NS_IMETHOD OnVolumeChanged(float aVolume) override { return NS_OK; }
+
+ private:
+  ~AndroidSpeechCallback() {}
+};
+
+NS_IMPL_ISUPPORTS(AndroidSpeechCallback, nsISpeechTaskCallback)
+
+NS_IMPL_ISUPPORTS(SpeechSynthesisService, nsISpeechService)
+
+void SpeechSynthesisService::Setup() {
+  ALOG("SpeechSynthesisService::Setup");
+
+  if (!StaticPrefs::media_webspeech_synth_enabled() ||
+      Preferences::GetBool("media.webspeech.synth.test")) {
+    return;
+  }
+
+  if (!jni::IsAvailable()) {
+    NS_WARNING("Failed to initialize speech synthesis");
+    return;
+  }
+
+  Init();
+  java::SpeechSynthesisService::InitSynth();
+}
+
+// nsISpeechService
+
+NS_IMETHODIMP
+SpeechSynthesisService::Speak(const nsAString& aText, const nsAString& aUri,
+                              float aVolume, float aRate, float aPitch,
+                              nsISpeechTask* aTask) {
+  if (mTask) {
+    NS_WARNING("Service only supports one speech task at a time.");
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  RefPtr<AndroidSpeechCallback> callback = new AndroidSpeechCallback();
+  nsresult rv = aTask->Setup(callback);
+
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  jni::String::LocalRef utteranceId =
+      java::SpeechSynthesisService::Speak(aUri, aText, aRate, aPitch, aVolume);
+  if (!utteranceId) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  mTaskUtteranceId = utteranceId->ToCString();
+  mTask = aTask;
+  mTaskTextLength = aText.Length();
+  mTaskTextOffset = 0;
+
+  return NS_OK;
+}
+
+SpeechSynthesisService* SpeechSynthesisService::GetInstance(bool aCreate) {
+  if (XRE_GetProcessType() != GeckoProcessType_Default) {
+    MOZ_ASSERT(
+        false,
+        "SpeechSynthesisService can only be started on main gecko process");
+    return nullptr;
+  }
+
+  if (!sSingleton && aCreate) {
+    sSingleton = new SpeechSynthesisService();
+    sSingleton->Setup();
+    ClearOnShutdown(&sSingleton);
+  }
+
+  return sSingleton;
+}
+
+already_AddRefed<SpeechSynthesisService>
+SpeechSynthesisService::GetInstanceForService() {
+  MOZ_ASSERT(NS_IsMainThread());
+  RefPtr<SpeechSynthesisService> sapiService = GetInstance();
+  return sapiService.forget();
+}
+
+// JNI
+
+void SpeechSynthesisService::RegisterVoice(jni::String::Param aUri,
+                                           jni::String::Param aName,
+                                           jni::String::Param aLocale,
+                                           bool aIsNetwork, bool aIsDefault) {
+  nsSynthVoiceRegistry* registry = nsSynthVoiceRegistry::GetInstance();
+  SpeechSynthesisService* service = SpeechSynthesisService::GetInstance(false);
+  // This service can only speak one utterance at a time, so we set
+  // aQueuesUtterances to true in order to track global state and schedule
+  // access to this service.
+  DebugOnly<nsresult> rv =
+      registry->AddVoice(service, aUri->ToString(), aName->ToString(),
+                         aLocale->ToString(), !aIsNetwork, true);
+
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
+
+  if (aIsDefault) {
+    DebugOnly<nsresult> rv = registry->SetDefaultVoice(aUri->ToString(), true);
+
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to set voice as default");
+  }
+}
+
+void SpeechSynthesisService::DoneRegisteringVoices() {
+  nsSynthVoiceRegistry* registry = nsSynthVoiceRegistry::GetInstance();
+  registry->NotifyVoicesChanged();
+}
+
+void SpeechSynthesisService::DispatchStart(jni::String::Param aUtteranceId) {
+  if (sSingleton) {
+    MOZ_ASSERT(sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString()));
+    nsCOMPtr<nsISpeechTask> task = sSingleton->mTask;
+    if (task) {
+      sSingleton->mTaskStartTime = TimeStamp::Now();
+      DebugOnly<nsresult> rv = task->DispatchStart();
+      NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch start");
+    }
+  }
+}
+
+void SpeechSynthesisService::DispatchEnd(jni::String::Param aUtteranceId) {
+  if (sSingleton) {
+    // In API older than 23, we will sometimes call this function
+    // without providing an utterance ID.
+    MOZ_ASSERT(!aUtteranceId ||
+               sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString()));
+    nsCOMPtr<nsISpeechTask> task = sSingleton->mTask;
+    sSingleton->mTask = nullptr;
+    if (task) {
+      TimeStamp startTime = sSingleton->mTaskStartTime;
+      DebugOnly<nsresult> rv =
+          task->DispatchEnd((TimeStamp::Now() - startTime).ToSeconds(),
+                            sSingleton->mTaskTextLength);
+      NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch start");
+    }
+  }
+}
+
+void SpeechSynthesisService::DispatchError(jni::String::Param aUtteranceId) {
+  if (sSingleton) {
+    MOZ_ASSERT(sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString()));
+    nsCOMPtr<nsISpeechTask> task = sSingleton->mTask;
+    sSingleton->mTask = nullptr;
+    if (task) {
+      TimeStamp startTime = sSingleton->mTaskStartTime;
+      DebugOnly<nsresult> rv =
+          task->DispatchError((TimeStamp::Now() - startTime).ToSeconds(),
+                              sSingleton->mTaskTextOffset);
+      NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch start");
+    }
+  }
+}
+
+void SpeechSynthesisService::DispatchBoundary(jni::String::Param aUtteranceId,
+                                              int32_t aStart, int32_t aEnd) {
+  if (sSingleton) {
+    MOZ_ASSERT(sSingleton->mTaskUtteranceId.Equals(aUtteranceId->ToCString()));
+    nsCOMPtr<nsISpeechTask> task = sSingleton->mTask;
+    if (task) {
+      TimeStamp startTime = sSingleton->mTaskStartTime;
+      sSingleton->mTaskTextOffset = aStart;
+      DebugOnly<nsresult> rv = task->DispatchBoundary(
+          u"word"_ns, (TimeStamp::Now() - startTime).ToSeconds(), aStart,
+          aEnd - aStart, 1);
+      NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to dispatch boundary");
+    }
+  }
+}
+
+}  // namespace dom
+}  // namespace mozilla
diff --git a/dom/media/webspeech/synth/android/SpeechSynthesisService.h b/dom/media/webspeech/synth/android/SpeechSynthesisService.h
new file mode 100644
index 0000000000..98c5143cf6
--- /dev/null
+++ b/dom/media/webspeech/synth/android/SpeechSynthesisService.h
@@ -0,0 +1,68 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechSynthesisService_h
+#define mozilla_dom_SpeechSynthesisService_h
+
+#include "nsISpeechService.h"
+#include "mozilla/java/SpeechSynthesisServiceNatives.h"
+#include "mozilla/StaticPtr.h"
+
+namespace mozilla {
+namespace dom {
+
+class SpeechSynthesisService final
+    : public nsISpeechService,
+      public java::SpeechSynthesisService::Natives<SpeechSynthesisService> {
+ public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+
+  SpeechSynthesisService(){};
+
+  void Setup();
+
+  static void DoneRegisteringVoices();
+
+  static void RegisterVoice(jni::String::Param aUri, jni::String::Param aName,
+                            jni::String::Param aLocale, bool aIsNetwork,
+                            bool aIsDefault);
+
+  static void DispatchStart(jni::String::Param aUtteranceId);
+
+  static void DispatchEnd(jni::String::Param aUtteranceId);
+
+  static void DispatchError(jni::String::Param aUtteranceId);
+
+  static void DispatchBoundary(jni::String::Param aUtteranceId, int32_t aStart,
+                               int32_t aEnd);
+
+  static SpeechSynthesisService* GetInstance(bool aCreate = true);
+  static already_AddRefed<SpeechSynthesisService> GetInstanceForService();
+
+  static StaticRefPtr<SpeechSynthesisService> sSingleton;
+
+ private:
+  virtual ~SpeechSynthesisService(){};
+
+  nsCOMPtr<nsISpeechTask> mTask;
+
+  // Unique ID assigned to utterance when it is sent to system service.
+  nsCString mTaskUtteranceId;
+
+  // Time stamp from the moment the utterance is started.
+  TimeStamp mTaskStartTime;
+
+  // Length of text of the utterance.
+  uint32_t mTaskTextLength;
+
+  // Current offset in characters of what has been spoken.
+  uint32_t mTaskTextOffset;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+#endif
diff --git a/dom/media/webspeech/synth/android/components.conf b/dom/media/webspeech/synth/android/components.conf
new file mode 100644
index 0000000000..4c35954fcc
--- /dev/null
+++ b/dom/media/webspeech/synth/android/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'cid': '{311b2dab-f4d3-4be4-8123-6732313d95c2}',
+        'contract_ids': ['@mozilla.org/androidspeechsynth;1'],
+        'singleton': True,
+        'type': 'mozilla::dom::SpeechSynthesisService',
+        'headers': ['/dom/media/webspeech/synth/android/SpeechSynthesisService.h'],
+        'constructor': 'mozilla::dom::SpeechSynthesisService::GetInstanceForService',
+        'categories': {"speech-synth-started": 'Android Speech Synth'},
+    },
+]
diff --git a/dom/media/webspeech/synth/android/moz.build b/dom/media/webspeech/synth/android/moz.build
new file mode 100644
index 0000000000..348c157f3c
--- /dev/null
+++ b/dom/media/webspeech/synth/android/moz.build
@@ -0,0 +1,19 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS.mozilla.dom += ["SpeechSynthesisService.h"]
+
+UNIFIED_SOURCES += [
+    "SpeechSynthesisService.cpp",
+]
+
+XPCOM_MANIFESTS += [
+    "components.conf",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"
diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h
new file mode 100644
index 0000000000..6148d59c92
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_OsxSpeechSynthesizerService_h
+#define mozilla_dom_OsxSpeechSynthesizerService_h
+
+#include "nsISpeechService.h"
+#include "nsIObserver.h"
+#include "mozilla/StaticPtr.h"
+
+namespace mozilla {
+namespace dom {
+
+class OSXSpeechSynthesizerService final : public nsISpeechService,
+                                          public nsIObserver {
+ public:
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+  NS_DECL_NSIOBSERVER
+
+  bool Init();
+
+  static OSXSpeechSynthesizerService* GetInstance();
+  static already_AddRefed<OSXSpeechSynthesizerService> GetInstanceForService();
+
+ private:
+  OSXSpeechSynthesizerService();
+  virtual ~OSXSpeechSynthesizerService() = default;
+
+  bool RegisterVoices();
+
+  bool mInitialized;
+  static mozilla::StaticRefPtr<OSXSpeechSynthesizerService> sSingleton;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
new file mode 100644
index 0000000000..a815c68644
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@@ -0,0 +1,431 @@
+/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsServiceManagerUtils.h"
+#include "nsObjCExceptions.h"
+#include "nsCocoaUtils.h"
+#include "nsIThread.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Assertions.h"
+#include "OSXSpeechSynthesizerService.h"
+
+#import <Cocoa/Cocoa.h>
+
+@class SpeechDelegate;
+
+// We can escape the default delimiters ("[[" and "]]") by temporarily
+// changing the delimiters just before they appear, and changing them back
+// just after.
+#define DLIM_ESCAPE_START "[[dlim (( ))]]"
+#define DLIM_ESCAPE_END "((dlim [[ ]]))"
+
+using namespace mozilla;
+
+class SpeechTaskCallback final : public nsISpeechTaskCallback {
+ public:
+  SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
+                     const nsTArray<size_t>& aOffsets);
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, nsISpeechTaskCallback)
+
+  NS_DECL_NSISPEECHTASKCALLBACK
+
+  void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength);
+  void OnError(uint32_t aIndex);
+  void OnDidFinishSpeaking();
+
+ private:
+  virtual ~SpeechTaskCallback();
+
+  float GetTimeDurationFromStart();
+
+  nsCOMPtr<nsISpeechTask> mTask;
+  NSSpeechSynthesizer* mSpeechSynthesizer;
+  SpeechDelegate* mDelegate;
+  TimeStamp mStartingTime;
+  uint32_t mCurrentIndex;
+  nsTArray<size_t> mOffsets;
+};
+
+@interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> {
+ @private
+  SpeechTaskCallback* mCallback;
+}
+
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback;
+@end
+
+@implementation SpeechDelegate
+- (id)initWithCallback:(SpeechTaskCallback*)aCallback {
+  [super init];
+  mCallback = aCallback;
+  return self;
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+            willSpeakWord:(NSRange)aRange
+                 ofString:(NSString*)aString {
+  mCallback->OnWillSpeakWord(aRange.location, aRange.length);
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender didFinishSpeaking:(BOOL)aFinishedSpeaking {
+  mCallback->OnDidFinishSpeaking();
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
+    didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
+                    ofString:(NSString*)aString
+                     message:(NSString*)aMessage {
+  mCallback->OnError(aCharacterIndex);
+}
+@end
+
+NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
+
+SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
+                                       const nsTArray<size_t>& aOffsets)
+    : mTask(aTask), mSpeechSynthesizer(aSynth), mCurrentIndex(0), mOffsets(aOffsets.Clone()) {
+  mDelegate = [[SpeechDelegate alloc] initWithCallback:this];
+  [mSpeechSynthesizer setDelegate:mDelegate];
+  mStartingTime = TimeStamp::Now();
+}
+
+SpeechTaskCallback::~SpeechTaskCallback() {
+  [mSpeechSynthesizer setDelegate:nil];
+  [mDelegate release];
+  [mSpeechSynthesizer release];
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnCancel() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer stopSpeaking];
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnPause() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+  if (!mTask) {
+    // When calling pause() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnResume() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer continueSpeaking];
+  if (!mTask) {
+    // When calling resume() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+SpeechTaskCallback::OnVolumeChanged(float aVolume) {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
+                    forProperty:NSSpeechVolumeProperty
+                          error:nil];
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+float SpeechTaskCallback::GetTimeDurationFromStart() {
+  TimeDuration duration = TimeStamp::Now() - mStartingTime;
+  return duration.ToSeconds();
+}
+
+void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) {
+  mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
+  if (!mTask) {
+    return;
+  }
+  mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex, aLength, 1);
+}
+
+void SpeechTaskCallback::OnError(uint32_t aIndex) {
+  if (!mTask) {
+    return;
+  }
+  mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
+}
+
+void SpeechTaskCallback::OnDidFinishSpeaking() {
+  mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
+  // no longer needed
+  [mSpeechSynthesizer setDelegate:nil];
+  mTask = nullptr;
+}
+
+namespace mozilla {
+namespace dom {
+
+struct OSXVoice {
+  OSXVoice() : mIsDefault(false) {}
+
+  nsString mUri;
+  nsString mName;
+  nsString mLocale;
+  bool mIsDefault;
+};
+
+class RegisterVoicesRunnable final : public Runnable {
+ public:
+  RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, nsTArray<OSXVoice>& aList)
+      : Runnable("RegisterVoicesRunnable"), mSpeechService(aSpeechService), mVoices(aList) {}
+
+  NS_IMETHOD Run() override;
+
+ private:
+  ~RegisterVoicesRunnable() override = default;
+
+  // This runnable always use sync mode.  It is unnecesarry to reference object
+  OSXSpeechSynthesizerService* mSpeechService;
+  nsTArray<OSXVoice>& mVoices;
+};
+
+NS_IMETHODIMP
+RegisterVoicesRunnable::Run() {
+  nsresult rv;
+  nsCOMPtr<nsISynthVoiceRegistry> registry = do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
+  if (!registry) {
+    return rv;
+  }
+
+  for (OSXVoice voice : mVoices) {
+    rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, voice.mLocale, true, false);
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      continue;
+    }
+
+    if (voice.mIsDefault) {
+      registry->SetDefaultVoice(voice.mUri, true);
+    }
+  }
+
+  registry->NotifyVoicesChanged();
+
+  return NS_OK;
+}
+
+class EnumVoicesRunnable final : public Runnable {
+ public:
+  explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
+      : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {}
+
+  NS_IMETHOD Run() override;
+
+ private:
+  ~EnumVoicesRunnable() override = default;
+
+  RefPtr<OSXSpeechSynthesizerService> mSpeechService;
+};
+
+NS_IMETHODIMP
+EnumVoicesRunnable::Run() {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  AutoTArray<OSXVoice, 64> list;
+
+  NSArray* voices = [NSSpeechSynthesizer availableVoices];
+  NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
+
+  for (NSString* voice in voices) {
+    OSXVoice item;
+
+    NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
+
+    nsAutoString identifier;
+    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], identifier);
+
+    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], item.mName);
+
+    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
+    item.mLocale.ReplaceChar('_', '-');
+
+    item.mUri.AssignLiteral("urn:moz-tts:osx:");
+    item.mUri.Append(identifier);
+
+    if ([voice isEqualToString:defaultVoice]) {
+      item.mIsDefault = true;
+    }
+
+    list.AppendElement(item);
+  }
+
+  RefPtr<RegisterVoicesRunnable> runnable = new RegisterVoicesRunnable(mSpeechService, list);
+  NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns,
+                                           GetMainThreadSerialEventTarget(), runnable.forget());
+
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+StaticRefPtr<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::sSingleton;
+
+NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
+NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
+
+OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() : mInitialized(false) {}
+
+bool OSXSpeechSynthesizerService::Init() {
+  if (Preferences::GetBool("media.webspeech.synth.test") ||
+      !StaticPrefs::media_webspeech_synth_enabled()) {
+    // When test is enabled, we shouldn't add OS backend (Bug 1160844)
+    return false;
+  }
+
+  nsCOMPtr<nsIThread> thread;
+  if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
+    return false;
+  }
+
+  // Get all the voices and register in the SynthVoiceRegistry
+  nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
+  thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
+
+  mInitialized = true;
+  return true;
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume,
+                                   float aRate, float aPitch, nsISpeechTask* aTask) {
+  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
+
+  MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns),
+             "OSXSpeechSynthesizerService doesn't allow this voice URI");
+
+  NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
+  // strlen("urn:moz-tts:osx:") == 16
+  NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
+  [synth setVoice:identifier];
+
+  // default rate is 180-220
+  [synth setObject:[NSNumber numberWithInt:aRate * 200] forProperty:NSSpeechRateProperty error:nil];
+  // volume allows 0.0-1.0
+  [synth setObject:[NSNumber numberWithFloat:aVolume] forProperty:NSSpeechVolumeProperty error:nil];
+  // Use default pitch value to calculate this
+  NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty error:nil];
+  if (defaultPitch) {
+    int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
+    [synth setObject:[NSNumber numberWithInt:newPitch]
+         forProperty:NSSpeechPitchBaseProperty
+               error:nil];
+  }
+
+  nsAutoString escapedText;
+  // We need to map the the offsets from the given text to the escaped text.
+  // The index of the offsets array is the position in the escaped text,
+  // the element value is the position in the user-supplied text.
+  nsTArray<size_t> offsets;
+  offsets.SetCapacity(aText.Length());
+
+  // This loop looks for occurances of "[[" or "]]", escapes them, and
+  // populates the offsets array to supply a map to the original offsets.
+  for (size_t i = 0; i < aText.Length(); i++) {
+    if (aText.Length() > i + 1 &&
+        ((aText[i] == ']' && aText[i + 1] == ']') || (aText[i] == '[' && aText[i + 1] == '['))) {
+      escapedText.AppendLiteral(DLIM_ESCAPE_START);
+      offsets.AppendElements(strlen(DLIM_ESCAPE_START));
+      escapedText.Append(aText[i]);
+      offsets.AppendElement(i);
+      escapedText.Append(aText[++i]);
+      offsets.AppendElement(i);
+      escapedText.AppendLiteral(DLIM_ESCAPE_END);
+      offsets.AppendElements(strlen(DLIM_ESCAPE_END));
+    } else {
+      escapedText.Append(aText[i]);
+      offsets.AppendElement(i);
+    }
+  }
+
+  RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
+  nsresult rv = aTask->Setup(callback);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  NSString* text = nsCocoaUtils::ToNSString(escapedText);
+  BOOL success = [synth startSpeakingString:text];
+  NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
+
+  aTask->DispatchStart();
+  return NS_OK;
+
+  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
+}
+
+NS_IMETHODIMP
+OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
+                                     const char16_t* aData) {
+  return NS_OK;
+}
+
+OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (XRE_GetProcessType() != GeckoProcessType_Default) {
+    return nullptr;
+  }
+
+  if (!sSingleton) {
+    RefPtr<OSXSpeechSynthesizerService> speechService = new OSXSpeechSynthesizerService();
+    if (speechService->Init()) {
+      sSingleton = speechService;
+      ClearOnShutdown(&sSingleton);
+    }
+  }
+  return sSingleton;
+}
+
+already_AddRefed<OSXSpeechSynthesizerService> OSXSpeechSynthesizerService::GetInstanceForService() {
+  RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
+  return speechService.forget();
+}
+
+}  // namespace dom
+}  // namespace mozilla
diff --git a/dom/media/webspeech/synth/cocoa/components.conf b/dom/media/webspeech/synth/cocoa/components.conf
new file mode 100644
index 0000000000..c9b0fa5ef0
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'cid': '{914e73b4-6337-4bef-97f3-4d069e053a12}',
+        'contract_ids': ['@mozilla.org/synthsystem;1'],
+        'singleton': True,
+        'type': 'mozilla::dom::OSXSpeechSynthesizerService',
+        'headers': ['/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.h'],
+        'constructor': 'mozilla::dom::OSXSpeechSynthesizerService::GetInstanceForService',
+        'categories': {"speech-synth-started": 'OSX Speech Synth'},
+    },
+]
diff --git a/dom/media/webspeech/synth/cocoa/moz.build b/dom/media/webspeech/synth/cocoa/moz.build
new file mode 100644
index 0000000000..4d59f7a389
--- /dev/null
+++ b/dom/media/webspeech/synth/cocoa/moz.build
@@ -0,0 +1,15 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    "OSXSpeechSynthesizerService.mm",
+]
+
+XPCOM_MANIFESTS += [
+    "components.conf",
+]
+
+FINAL_LIBRARY = "xul"
diff --git a/dom/media/webspeech/synth/crashtests/1230428.html b/dom/media/webspeech/synth/crashtests/1230428.html
new file mode 100644
index 0000000000..40fa000710
--- /dev/null
+++ b/dom/media/webspeech/synth/crashtests/1230428.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html>
+<html class="reftest-wait">
+<head>
+<meta charset="utf-8">
+<script type="application/javascript">
+function f()
+{
+  if (speechSynthesis.getVoices().length == 0) {
+    // No synthesis backend to test this
+    document.documentElement.removeAttribute('class');
+    return;
+  }
+
+  var s = new SpeechSynthesisUtterance("hello world");
+  s.onerror = () => {
+    // No synthesis backend to test this
+    document.documentElement.removeAttribute('class');
+    return;
+  }
+  s.onend = () => {
+    document.documentElement.removeAttribute('class');
+  };
+  speechSynthesis.speak(s);
+  speechSynthesis.cancel();
+  speechSynthesis.pause();
+  speechSynthesis.resume();
+}
+  </script>
+</head>
+<body onload="f();">
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/crashtests/crashtests.list b/dom/media/webspeech/synth/crashtests/crashtests.list
new file mode 100644
index 0000000000..07e931c929
--- /dev/null
+++ b/dom/media/webspeech/synth/crashtests/crashtests.list
@@ -0,0 +1 @@
+skip-if(!cocoaWidget) pref(media.webspeech.synth.enabled,true) load 1230428.html # bug 1230428
diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
new file mode 100644
index 0000000000..38e360bf4c
--- /dev/null
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -0,0 +1,50 @@
+/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 40 -*- */
+/* vim: set ts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+include protocol PContent;
+include protocol PSpeechSynthesisRequest;
+
+namespace mozilla {
+namespace dom {
+
+struct RemoteVoice {
+  nsString voiceURI;
+  nsString name;
+  nsString lang;
+  bool localService;
+  bool queued;
+};
+
+[ManualDealloc]
+sync protocol PSpeechSynthesis
+{
+  manager PContent;
+  manages PSpeechSynthesisRequest;
+
+child:
+
+    async VoiceAdded(RemoteVoice aVoice);
+
+    async VoiceRemoved(nsString aUri);
+
+    async SetDefaultVoice(nsString aUri, bool aIsDefault);
+
+    async IsSpeakingChanged(bool aIsSpeaking);
+
+    async NotifyVoicesChanged();
+
+    async InitialVoicesAndState(RemoteVoice[] aVoices, nsString[] aDefaults,
+                                bool aIsSpeaking);
+
+parent:
+    async __delete__();
+
+    async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
+                                  float aVolume, float aRate, float aPitch, bool aShouldResistFingerprinting);
+};
+
+} // namespace dom
+} // namespace mozilla
diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl
new file mode 100644
index 0000000000..8543eebc5b
--- /dev/null
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesisRequest.ipdl
@@ -0,0 +1,48 @@
+/* -*- Mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 40 -*- */
+/* vim: set ts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+include protocol PSpeechSynthesis;
+
+namespace mozilla {
+namespace dom {
+
+[ManualDealloc, ChildImpl=virtual, ParentImpl=virtual]
+async protocol PSpeechSynthesisRequest
+{
+  manager PSpeechSynthesis;
+
+ parent:
+
+  async __delete__();
+
+  async Pause();
+
+  async Resume();
+
+  async Cancel();
+
+  async ForceEnd();
+
+  async SetAudioOutputVolume(float aVolume);
+
+ child:
+
+  async OnEnd(bool aIsError, float aElapsedTime, uint32_t aCharIndex);
+
+  async OnStart(nsString aUri);
+
+  async OnPause(float aElapsedTime, uint32_t aCharIndex);
+
+  async OnResume(float aElapsedTime, uint32_t aCharIndex);
+
+  async OnBoundary(nsString aName, float aElapsedTime, uint32_t aCharIndex,
+                   uint32_t aCharLength, uint8_t argc);
+
+  async OnMark(nsString aName, float aElapsedTime, uint32_t aCharIndex);
+};
+
+} // namespace dom
+} // namespace mozilla
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
new file mode 100644
index 0000000000..9a9e9b6fe2
--- /dev/null
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -0,0 +1,169 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechSynthesisChild.h"
+#include "nsSynthVoiceRegistry.h"
+
+namespace mozilla::dom {
+
+SpeechSynthesisChild::SpeechSynthesisChild() {
+  MOZ_COUNT_CTOR(SpeechSynthesisChild);
+}
+
+SpeechSynthesisChild::~SpeechSynthesisChild() {
+  MOZ_COUNT_DTOR(SpeechSynthesisChild);
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisChild::RecvInitialVoicesAndState(
+    nsTArray<RemoteVoice>&& aVoices, nsTArray<nsString>&& aDefaults,
+    const bool& aIsSpeaking) {
+  nsSynthVoiceRegistry::RecvInitialVoicesAndState(aVoices, aDefaults,
+                                                  aIsSpeaking);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisChild::RecvVoiceAdded(
+    const RemoteVoice& aVoice) {
+  nsSynthVoiceRegistry::RecvAddVoice(aVoice);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisChild::RecvVoiceRemoved(
+    const nsAString& aUri) {
+  nsSynthVoiceRegistry::RecvRemoveVoice(aUri);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisChild::RecvSetDefaultVoice(
+    const nsAString& aUri, const bool& aIsDefault) {
+  nsSynthVoiceRegistry::RecvSetDefaultVoice(aUri, aIsDefault);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisChild::RecvIsSpeakingChanged(
+    const bool& aIsSpeaking) {
+  nsSynthVoiceRegistry::RecvIsSpeakingChanged(aIsSpeaking);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisChild::RecvNotifyVoicesChanged() {
+  nsSynthVoiceRegistry::RecvNotifyVoicesChanged();
+  return IPC_OK();
+}
+
+PSpeechSynthesisRequestChild*
+SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(
+    const nsAString& aText, const nsAString& aLang, const nsAString& aUri,
+    const float& aVolume, const float& aRate, const float& aPitch,
+    const bool& aShouldResistFingerprinting) {
+  MOZ_CRASH("Caller is supposed to manually construct a request!");
+}
+
+bool SpeechSynthesisChild::DeallocPSpeechSynthesisRequestChild(
+    PSpeechSynthesisRequestChild* aActor) {
+  delete aActor;
+  return true;
+}
+
+// SpeechSynthesisRequestChild
+
+SpeechSynthesisRequestChild::SpeechSynthesisRequestChild(SpeechTaskChild* aTask)
+    : mTask(aTask) {
+  mTask->mActor = this;
+  MOZ_COUNT_CTOR(SpeechSynthesisRequestChild);
+}
+
+SpeechSynthesisRequestChild::~SpeechSynthesisRequestChild() {
+  MOZ_COUNT_DTOR(SpeechSynthesisRequestChild);
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnStart(
+    const nsAString& aUri) {
+  mTask->DispatchStartImpl(aUri);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnEnd(
+    const bool& aIsError, const float& aElapsedTime,
+    const uint32_t& aCharIndex) {
+  SpeechSynthesisRequestChild* actor = mTask->mActor;
+  mTask->mActor = nullptr;
+
+  if (aIsError) {
+    mTask->DispatchErrorImpl(aElapsedTime, aCharIndex);
+  } else {
+    mTask->DispatchEndImpl(aElapsedTime, aCharIndex);
+  }
+
+  SpeechSynthesisRequestChild::Send__delete__(actor);
+
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnPause(
+    const float& aElapsedTime, const uint32_t& aCharIndex) {
+  mTask->DispatchPauseImpl(aElapsedTime, aCharIndex);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnResume(
+    const float& aElapsedTime, const uint32_t& aCharIndex) {
+  mTask->DispatchResumeImpl(aElapsedTime, aCharIndex);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnBoundary(
+    const nsAString& aName, const float& aElapsedTime,
+    const uint32_t& aCharIndex, const uint32_t& aCharLength,
+    const uint8_t& argc) {
+  mTask->DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength,
+                              argc);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestChild::RecvOnMark(
+    const nsAString& aName, const float& aElapsedTime,
+    const uint32_t& aCharIndex) {
+  mTask->DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
+  return IPC_OK();
+}
+
+// SpeechTaskChild
+
+SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance,
+                                 bool aShouldResistFingerprinting)
+    : nsSpeechTask(aUtterance, aShouldResistFingerprinting), mActor(nullptr) {}
+
+NS_IMETHODIMP
+SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback) {
+  MOZ_CRASH("Should never be called from child");
+}
+
+void SpeechTaskChild::Pause() {
+  MOZ_ASSERT(mActor);
+  mActor->SendPause();
+}
+
+void SpeechTaskChild::Resume() {
+  MOZ_ASSERT(mActor);
+  mActor->SendResume();
+}
+
+void SpeechTaskChild::Cancel() {
+  MOZ_ASSERT(mActor);
+  mActor->SendCancel();
+}
+
+void SpeechTaskChild::ForceEnd() {
+  MOZ_ASSERT(mActor);
+  mActor->SendForceEnd();
+}
+
+void SpeechTaskChild::SetAudioOutputVolume(float aVolume) {
+  if (mActor) {
+    mActor->SendSetAudioOutputVolume(aVolume);
+  }
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
new file mode 100644
index 0000000000..f57582932a
--- /dev/null
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -0,0 +1,107 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechSynthesisChild_h
+#define mozilla_dom_SpeechSynthesisChild_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/dom/PSpeechSynthesisChild.h"
+#include "mozilla/dom/PSpeechSynthesisRequestChild.h"
+#include "nsSpeechTask.h"
+
+namespace mozilla::dom {
+
+class nsSynthVoiceRegistry;
+class SpeechSynthesisRequestChild;
+class SpeechTaskChild;
+
+class SpeechSynthesisChild : public PSpeechSynthesisChild {
+  friend class nsSynthVoiceRegistry;
+  friend class PSpeechSynthesisChild;
+
+ public:
+  mozilla::ipc::IPCResult RecvInitialVoicesAndState(
+      nsTArray<RemoteVoice>&& aVoices, nsTArray<nsString>&& aDefaults,
+      const bool& aIsSpeaking);
+
+  mozilla::ipc::IPCResult RecvVoiceAdded(const RemoteVoice& aVoice);
+
+  mozilla::ipc::IPCResult RecvVoiceRemoved(const nsAString& aUri);
+
+  mozilla::ipc::IPCResult RecvSetDefaultVoice(const nsAString& aUri,
+                                              const bool& aIsDefault);
+
+  mozilla::ipc::IPCResult RecvIsSpeakingChanged(const bool& aIsSpeaking);
+
+  mozilla::ipc::IPCResult RecvNotifyVoicesChanged();
+
+ protected:
+  SpeechSynthesisChild();
+  virtual ~SpeechSynthesisChild();
+
+  PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild(
+      const nsAString& aLang, const nsAString& aUri, const nsAString& aText,
+      const float& aVolume, const float& aPitch, const float& aRate,
+      const bool& aShouldResistFingerprinting);
+  bool DeallocPSpeechSynthesisRequestChild(
+      PSpeechSynthesisRequestChild* aActor);
+};
+
+class SpeechSynthesisRequestChild : public PSpeechSynthesisRequestChild {
+ public:
+  explicit SpeechSynthesisRequestChild(SpeechTaskChild* aTask);
+  virtual ~SpeechSynthesisRequestChild();
+
+ protected:
+  mozilla::ipc::IPCResult RecvOnStart(const nsAString& aUri) override;
+
+  mozilla::ipc::IPCResult RecvOnEnd(const bool& aIsError,
+                                    const float& aElapsedTime,
+                                    const uint32_t& aCharIndex) override;
+
+  mozilla::ipc::IPCResult RecvOnPause(const float& aElapsedTime,
+                                      const uint32_t& aCharIndex) override;
+
+  mozilla::ipc::IPCResult RecvOnResume(const float& aElapsedTime,
+                                       const uint32_t& aCharIndex) override;
+
+  mozilla::ipc::IPCResult RecvOnBoundary(const nsAString& aName,
+                                         const float& aElapsedTime,
+                                         const uint32_t& aCharIndex,
+                                         const uint32_t& aCharLength,
+                                         const uint8_t& argc) override;
+
+  mozilla::ipc::IPCResult RecvOnMark(const nsAString& aName,
+                                     const float& aElapsedTime,
+                                     const uint32_t& aCharIndex) override;
+
+  RefPtr<SpeechTaskChild> mTask;
+};
+
+class SpeechTaskChild : public nsSpeechTask {
+  friend class SpeechSynthesisRequestChild;
+
+ public:
+  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance,
+                           bool aShouldResistFingerprinting);
+
+  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback) override;
+
+  void Pause() override;
+
+  void Resume() override;
+
+  void Cancel() override;
+
+  void ForceEnd() override;
+
+  void SetAudioOutputVolume(float aVolume) override;
+
+ private:
+  SpeechSynthesisRequestChild* mActor;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
new file mode 100644
index 0000000000..a9eb53c5b7
--- /dev/null
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechSynthesisParent.h"
+#include "nsSynthVoiceRegistry.h"
+
+namespace mozilla::dom {
+
+SpeechSynthesisParent::SpeechSynthesisParent() {
+  MOZ_COUNT_CTOR(SpeechSynthesisParent);
+}
+
+SpeechSynthesisParent::~SpeechSynthesisParent() {
+  MOZ_COUNT_DTOR(SpeechSynthesisParent);
+}
+
+void SpeechSynthesisParent::ActorDestroy(ActorDestroyReason aWhy) {
+  // Implement me! Bug 1005141
+}
+
+bool SpeechSynthesisParent::SendInit() {
+  return nsSynthVoiceRegistry::GetInstance()->SendInitialVoicesAndState(this);
+}
+
+PSpeechSynthesisRequestParent*
+SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(
+    const nsAString& aText, const nsAString& aLang, const nsAString& aUri,
+    const float& aVolume, const float& aRate, const float& aPitch,
+    const bool& aShouldResistFingerprinting) {
+  RefPtr<SpeechTaskParent> task =
+      new SpeechTaskParent(aVolume, aText, aShouldResistFingerprinting);
+  SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task);
+  return actor;
+}
+
+bool SpeechSynthesisParent::DeallocPSpeechSynthesisRequestParent(
+    PSpeechSynthesisRequestParent* aActor) {
+  delete aActor;
+  return true;
+}
+
+mozilla::ipc::IPCResult
+SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(
+    PSpeechSynthesisRequestParent* aActor, const nsAString& aText,
+    const nsAString& aLang, const nsAString& aUri, const float& aVolume,
+    const float& aRate, const float& aPitch,
+    const bool& aShouldResistFingerprinting) {
+  MOZ_ASSERT(aActor);
+  SpeechSynthesisRequestParent* actor =
+      static_cast<SpeechSynthesisRequestParent*>(aActor);
+  nsSynthVoiceRegistry::GetInstance()->Speak(aText, aLang, aUri, aVolume, aRate,
+                                             aPitch, actor->mTask);
+  return IPC_OK();
+}
+
+// SpeechSynthesisRequestParent
+
+SpeechSynthesisRequestParent::SpeechSynthesisRequestParent(
+    SpeechTaskParent* aTask)
+    : mTask(aTask) {
+  mTask->mActor = this;
+  MOZ_COUNT_CTOR(SpeechSynthesisRequestParent);
+}
+
+SpeechSynthesisRequestParent::~SpeechSynthesisRequestParent() {
+  if (mTask) {
+    mTask->mActor = nullptr;
+    // If we still have a task, cancel it.
+    mTask->Cancel();
+  }
+  MOZ_COUNT_DTOR(SpeechSynthesisRequestParent);
+}
+
+void SpeechSynthesisRequestParent::ActorDestroy(ActorDestroyReason aWhy) {
+  // Implement me! Bug 1005141
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvPause() {
+  MOZ_ASSERT(mTask);
+  mTask->Pause();
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestParent::Recv__delete__() {
+  MOZ_ASSERT(mTask);
+  mTask->mActor = nullptr;
+  mTask = nullptr;
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvResume() {
+  MOZ_ASSERT(mTask);
+  mTask->Resume();
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvCancel() {
+  MOZ_ASSERT(mTask);
+  mTask->Cancel();
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvForceEnd() {
+  MOZ_ASSERT(mTask);
+  mTask->ForceEnd();
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult SpeechSynthesisRequestParent::RecvSetAudioOutputVolume(
+    const float& aVolume) {
+  MOZ_ASSERT(mTask);
+  mTask->SetAudioOutputVolume(aVolume);
+  return IPC_OK();
+}
+
+// SpeechTaskParent
+
+nsresult SpeechTaskParent::DispatchStartImpl(const nsAString& aUri) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnStart(aUri)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+nsresult SpeechTaskParent::DispatchEndImpl(float aElapsedTime,
+                                           uint32_t aCharIndex) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnEnd(false, aElapsedTime, aCharIndex)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+nsresult SpeechTaskParent::DispatchPauseImpl(float aElapsedTime,
+                                             uint32_t aCharIndex) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnPause(aElapsedTime, aCharIndex)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+nsresult SpeechTaskParent::DispatchResumeImpl(float aElapsedTime,
+                                              uint32_t aCharIndex) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnResume(aElapsedTime, aCharIndex)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+nsresult SpeechTaskParent::DispatchErrorImpl(float aElapsedTime,
+                                             uint32_t aCharIndex) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnEnd(true, aElapsedTime, aCharIndex)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+nsresult SpeechTaskParent::DispatchBoundaryImpl(const nsAString& aName,
+                                                float aElapsedTime,
+                                                uint32_t aCharIndex,
+                                                uint32_t aCharLength,
+                                                uint8_t argc) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnBoundary(aName, aElapsedTime, aCharIndex,
+                                          aCharLength, argc)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+nsresult SpeechTaskParent::DispatchMarkImpl(const nsAString& aName,
+                                            float aElapsedTime,
+                                            uint32_t aCharIndex) {
+  if (!mActor) {
+    // Child is already gone.
+    return NS_OK;
+  }
+
+  if (NS_WARN_IF(!(mActor->SendOnMark(aName, aElapsedTime, aCharIndex)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
new file mode 100644
index 0000000000..6ae4d38bbc
--- /dev/null
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -0,0 +1,102 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechSynthesisParent_h
+#define mozilla_dom_SpeechSynthesisParent_h
+
+#include "mozilla/dom/PSpeechSynthesisParent.h"
+#include "mozilla/dom/PSpeechSynthesisRequestParent.h"
+#include "nsSpeechTask.h"
+
+namespace mozilla::dom {
+
+class ContentParent;
+class SpeechTaskParent;
+class SpeechSynthesisRequestParent;
+
+class SpeechSynthesisParent : public PSpeechSynthesisParent {
+  friend class ContentParent;
+  friend class SpeechSynthesisRequestParent;
+  friend class PSpeechSynthesisParent;
+
+ public:
+  void ActorDestroy(ActorDestroyReason aWhy) override;
+
+  bool SendInit();
+
+ protected:
+  SpeechSynthesisParent();
+  virtual ~SpeechSynthesisParent();
+  PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent(
+      const nsAString& aText, const nsAString& aLang, const nsAString& aUri,
+      const float& aVolume, const float& aRate, const float& aPitch,
+      const bool& aShouldResistFingerprinting);
+
+  bool DeallocPSpeechSynthesisRequestParent(
+      PSpeechSynthesisRequestParent* aActor);
+
+  mozilla::ipc::IPCResult RecvPSpeechSynthesisRequestConstructor(
+      PSpeechSynthesisRequestParent* aActor, const nsAString& aText,
+      const nsAString& aLang, const nsAString& aUri, const float& aVolume,
+      const float& aRate, const float& aPitch,
+      const bool& aShouldResistFingerprinting) override;
+};
+
+class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent {
+ public:
+  explicit SpeechSynthesisRequestParent(SpeechTaskParent* aTask);
+  virtual ~SpeechSynthesisRequestParent();
+
+  RefPtr<SpeechTaskParent> mTask;
+
+ protected:
+  void ActorDestroy(ActorDestroyReason aWhy) override;
+
+  mozilla::ipc::IPCResult RecvPause() override;
+
+  mozilla::ipc::IPCResult RecvResume() override;
+
+  mozilla::ipc::IPCResult RecvCancel() override;
+
+  mozilla::ipc::IPCResult RecvForceEnd() override;
+
+  mozilla::ipc::IPCResult RecvSetAudioOutputVolume(
+      const float& aVolume) override;
+
+  mozilla::ipc::IPCResult Recv__delete__() override;
+};
+
+class SpeechTaskParent : public nsSpeechTask {
+  friend class SpeechSynthesisRequestParent;
+
+ public:
+  SpeechTaskParent(float aVolume, const nsAString& aUtterance,
+                   bool aShouldResistFingerprinting)
+      : nsSpeechTask(aVolume, aUtterance, aShouldResistFingerprinting),
+        mActor(nullptr) {}
+
+  nsresult DispatchStartImpl(const nsAString& aUri) override;
+
+  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) override;
+
+  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) override;
+
+  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) override;
+
+  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) override;
+
+  nsresult DispatchBoundaryImpl(const nsAString& aName, float aElapsedTime,
+                                uint32_t aCharIndex, uint32_t aCharLength,
+                                uint8_t argc) override;
+
+  nsresult DispatchMarkImpl(const nsAString& aName, float aElapsedTime,
+                            uint32_t aCharIndex) override;
+
+ private:
+  SpeechSynthesisRequestParent* mActor;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/moz.build b/dom/media/webspeech/synth/moz.build
new file mode 100644
index 0000000000..dde668668a
--- /dev/null
+++ b/dom/media/webspeech/synth/moz.build
@@ -0,0 +1,65 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG["MOZ_WEBSPEECH"]:
+    MOCHITEST_MANIFESTS += [
+        "test/mochitest.ini",
+        "test/startup/mochitest.ini",
+    ]
+
+    XPIDL_MODULE = "dom_webspeechsynth"
+
+    XPIDL_SOURCES += ["nsISpeechService.idl", "nsISynthVoiceRegistry.idl"]
+
+    EXPORTS.mozilla.dom += [
+        "ipc/SpeechSynthesisChild.h",
+        "ipc/SpeechSynthesisParent.h",
+        "nsSpeechTask.h",
+        "nsSynthVoiceRegistry.h",
+        "SpeechSynthesis.h",
+        "SpeechSynthesisUtterance.h",
+        "SpeechSynthesisVoice.h",
+    ]
+
+    UNIFIED_SOURCES += [
+        "ipc/SpeechSynthesisChild.cpp",
+        "ipc/SpeechSynthesisParent.cpp",
+        "nsSpeechTask.cpp",
+        "nsSynthVoiceRegistry.cpp",
+        "SpeechSynthesis.cpp",
+        "SpeechSynthesisUtterance.cpp",
+        "SpeechSynthesisVoice.cpp",
+    ]
+
+    if CONFIG["MOZ_WEBSPEECH_TEST_BACKEND"]:
+        UNIFIED_SOURCES += ["test/nsFakeSynthServices.cpp"]
+
+        XPCOM_MANIFESTS += [
+            "test/components.conf",
+        ]
+
+    if CONFIG["MOZ_WIDGET_TOOLKIT"] == "windows":
+        DIRS += ["windows"]
+
+    if CONFIG["MOZ_WIDGET_TOOLKIT"] == "cocoa":
+        DIRS += ["cocoa"]
+
+    if CONFIG["MOZ_WIDGET_TOOLKIT"] == "android":
+        DIRS += ["android"]
+
+    if CONFIG["MOZ_SYNTH_SPEECHD"]:
+        DIRS += ["speechd"]
+
+    IPDL_SOURCES += [
+        "ipc/PSpeechSynthesis.ipdl",
+        "ipc/PSpeechSynthesisRequest.ipdl",
+    ]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"
+LOCAL_INCLUDES += [
+    "ipc",
+]
diff --git a/dom/media/webspeech/synth/nsISpeechService.idl b/dom/media/webspeech/synth/nsISpeechService.idl
new file mode 100644
index 0000000000..b69973b6d2
--- /dev/null
+++ b/dom/media/webspeech/synth/nsISpeechService.idl
@@ -0,0 +1,143 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+/**
+ * A callback is implemented by the service.
+ */
+[scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)]
+interface nsISpeechTaskCallback : nsISupports
+{
+  /**
+   * The user or application has paused the speech.
+   */
+  void onPause();
+
+  /**
+   * The user or application has resumed the speech.
+   */
+  void onResume();
+
+  /**
+   * The user or application has canceled the speech.
+   */
+  void onCancel();
+
+  /**
+   * The user or application has changed the volume of this speech.
+   */
+  void onVolumeChanged(in float aVolume);
+};
+
+
+/**
+ * A task is associated with a single utterance. It is provided by the browser
+ * to the service in the speak() method.
+ */
+[scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)]
+interface nsISpeechTask : nsISupports
+{
+  /**
+   * Prepare browser for speech.
+   *
+   * @param aCallback callback object for mid-speech operations.
+   */
+  void setup(in nsISpeechTaskCallback aCallback);
+
+  /**
+   * Dispatch start event.
+   */
+  void dispatchStart();
+
+  /**
+   * Dispatch end event.
+   *
+   * @param aElapsedTime time in seconds since speech has started.
+   * @param aCharIndex   offset of spoken characters.
+   */
+  void dispatchEnd(in float aElapsedTime, in unsigned long aCharIndex);
+
+  /**
+   * Dispatch pause event.
+   *
+   * @param aElapsedTime time in seconds since speech has started.
+   * @param aCharIndex   offset of spoken characters.
+   */
+  void dispatchPause(in float aElapsedTime, in unsigned long aCharIndex);
+
+  /**
+   * Dispatch resume event.
+   *
+   * @param aElapsedTime time in seconds since speech has started.
+   * @param aCharIndex   offset of spoken characters.
+   */
+  void dispatchResume(in float aElapsedTime, in unsigned long aCharIndex);
+
+  /**
+   * Dispatch error event.
+   *
+   * @param aElapsedTime time in seconds since speech has started.
+   * @param aCharIndex   offset of spoken characters.
+   */
+  void dispatchError(in float aElapsedTime, in unsigned long aCharIndex);
+
+  /**
+   * Dispatch boundary event.
+   *
+   * @param aName        name of boundary, 'word' or 'sentence'
+   * @param aElapsedTime time in seconds since speech has started.
+   * @param aCharIndex   offset of spoken characters.
+   * @param aCharLength  length of text in boundary event to be spoken.
+   */
+  [optional_argc] void dispatchBoundary(in AString aName, in float aElapsedTime,
+                                        in unsigned long aCharIndex,
+                                        [optional] in unsigned long aCharLength);
+
+  /**
+   * Dispatch mark event.
+   *
+   * @param aName        mark identifier.
+   * @param aElapsedTime time in seconds since speech has started.
+   * @param aCharIndex   offset of spoken characters.
+   */
+  void dispatchMark(in AString aName, in float aElapsedTime, in unsigned long aCharIndex);
+};
+
+/**
+ * The main interface of a speech synthesis service.
+ *
+ * A service is responsible for outputting audio.
+ * The service dispatches events, starting with dispatchStart() and ending with
+ * dispatchEnd or dispatchError().
+ * A service must also respond with the currect actions and events in response
+ * to implemented callback methods.
+ */
+[scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)]
+interface nsISpeechService : nsISupports
+{
+  /**
+   * Speak the given text using the voice identified byu the given uri. See
+   * W3C Speech API spec for information about pitch and rate.
+   * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes
+   *
+   * @param aText   text to utter.
+   * @param aUri    unique voice identifier.
+   * @param aVolume volume to speak voice in. Only relevant for indirect audio.
+   * @param aRate   rate to speak voice in.
+   * @param aPitch  pitch to speak voice in.
+   * @param aTask  task instance for utterance, used for sending events or audio
+   *                 data back to browser.
+   */
+  void speak(in AString aText, in AString aUri,
+             in float aVolume, in float aRate, in float aPitch,
+             in nsISpeechTask aTask);
+};
+
+%{C++
+// This is the service category speech services could use to start up as
+// a component.
+#define NS_SPEECH_SYNTH_STARTED "speech-synth-started"
+%}
diff --git a/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl b/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl
new file mode 100644
index 0000000000..8dd3a0426c
--- /dev/null
+++ b/dom/media/webspeech/synth/nsISynthVoiceRegistry.idl
@@ -0,0 +1,77 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+interface nsISpeechService;
+
+[scriptable, builtinclass, uuid(5d7a0b38-77e5-4ee5-897c-ce5db9b85d44)]
+interface nsISynthVoiceRegistry : nsISupports
+{
+  /**
+   * Register a speech synthesis voice.
+   *
+   * @param aService          the service that provides this voice.
+   * @param aUri              a unique identifier for this voice.
+   * @param aName             human-readable name for this voice.
+   * @param aLang             a BCP 47 language tag.
+   * @param aLocalService     true if service does not require network.
+   * @param aQueuesUtterances true if voice only speaks one utterance at a time
+   */
+  void addVoice(in nsISpeechService aService, in AString aUri,
+                in AString aName, in AString aLang,
+                in boolean aLocalService, in boolean aQueuesUtterances);
+
+  /**
+   * Remove a speech synthesis voice.
+   *
+   * @param aService the service that was used to add the voice.
+   * @param aUri     a unique identifier of an existing voice.
+   */
+  void removeVoice(in nsISpeechService aService, in AString aUri);
+
+  /**
+   * Notify content of voice availability changes. This allows content
+   * to be notified of voice catalog changes in real time.
+   */
+  void notifyVoicesChanged();
+
+  /**
+   * Set a voice as default.
+   *
+   * @param aUri       a unique identifier of an existing voice.
+   * @param aIsDefault true if this voice should be toggled as default.
+   */
+  void setDefaultVoice(in AString aUri, in boolean aIsDefault);
+
+  readonly attribute uint32_t voiceCount;
+
+  AString getVoice(in uint32_t aIndex);
+
+  bool isDefaultVoice(in AString aUri);
+
+  bool isLocalVoice(in AString aUri);
+
+  AString getVoiceLang(in AString aUri);
+
+  AString getVoiceName(in AString aUri);
+};
+
+%{C++
+#define NS_SYNTHVOICEREGISTRY_CID                   \
+  { /* {7090524d-5574-4492-a77f-d8d558ced59d} */       \
+    0x7090524d,                                        \
+    0x5574,                                            \
+    0x4492,                                            \
+    { 0xa7, 0x7f, 0xd8, 0xd5, 0x58, 0xce, 0xd5, 0x9d } \
+  }
+
+#define NS_SYNTHVOICEREGISTRY_CONTRACTID \
+    "@mozilla.org/synth-voice-registry;1"
+
+#define NS_SYNTHVOICEREGISTRY_CLASSNAME \
+    "Speech Synthesis Voice Registry"
+
+%}
diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp
new file mode 100644
index 0000000000..b102172466
--- /dev/null
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -0,0 +1,389 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioChannelService.h"
+#include "AudioSegment.h"
+#include "nsSpeechTask.h"
+#include "nsSynthVoiceRegistry.h"
+#include "nsXULAppAPI.h"
+#include "SharedBuffer.h"
+#include "SpeechSynthesis.h"
+
+#undef LOG
+extern mozilla::LogModule* GetSpeechSynthLog();
+#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
+
+#define AUDIO_TRACK 1
+
+namespace mozilla::dom {
+
+// nsSpeechTask
+
+NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance,
+                              mCallback)
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
+  NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
+
+nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance,
+                           bool aShouldResistFingerprinting)
+    : mUtterance(aUtterance),
+      mInited(false),
+      mPrePaused(false),
+      mPreCanceled(false),
+      mCallback(nullptr),
+      mShouldResistFingerprinting(aShouldResistFingerprinting),
+      mState(STATE_PENDING) {
+  mText = aUtterance->mText;
+  mVolume = aUtterance->Volume();
+}
+
+nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText,
+                           bool aShouldResistFingerprinting)
+    : mUtterance(nullptr),
+      mVolume(aVolume),
+      mText(aText),
+      mInited(false),
+      mPrePaused(false),
+      mPreCanceled(false),
+      mCallback(nullptr),
+      mShouldResistFingerprinting(aShouldResistFingerprinting),
+      mState(STATE_PENDING) {}
+
+nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); }
+
+void nsSpeechTask::Init() { mInited = true; }
+
+void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) {
+  mChosenVoiceURI = aUri;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
+
+  mCallback = aCallback;
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchStart() {
+  nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
+  return DispatchStartImpl();
+}
+
+nsresult nsSpeechTask::DispatchStartImpl() {
+  return DispatchStartImpl(mChosenVoiceURI);
+}
+
+nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) {
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
+
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(mState != STATE_PENDING)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  CreateAudioChannelAgent();
+
+  mState = STATE_SPEAKING;
+  mUtterance->mChosenVoiceURI = aUri;
+  mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) {
+  // After we end, no callback functions should go through.
+  mCallback = nullptr;
+
+  if (!mPreCanceled) {
+    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
+  }
+
+  return DispatchEndImpl(aElapsedTime, aCharIndex);
+}
+
+nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime,
+                                       uint32_t aCharIndex) {
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
+
+  DestroyAudioChannelAgent();
+
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(mState == STATE_ENDED)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
+
+  if (mSpeechSynthesis) {
+    mSpeechSynthesis->OnEnd(this);
+  }
+
+  mState = STATE_ENDED;
+  utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr,
+                                          aElapsedTime, u""_ns);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) {
+  return DispatchPauseImpl(aElapsedTime, aCharIndex);
+}
+
+nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime,
+                                         uint32_t aCharIndex) {
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(mUtterance->mPaused)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if (NS_WARN_IF(mState == STATE_ENDED)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  mUtterance->mPaused = true;
+  if (mState == STATE_SPEAKING) {
+    mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr,
+                                             aElapsedTime, u""_ns);
+  }
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) {
+  return DispatchResumeImpl(aElapsedTime, aCharIndex);
+}
+
+nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime,
+                                          uint32_t aCharIndex) {
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(!(mUtterance->mPaused))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if (NS_WARN_IF(mState == STATE_ENDED)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  mUtterance->mPaused = false;
+  if (mState == STATE_SPEAKING) {
+    mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr,
+                                             aElapsedTime, u""_ns);
+  }
+
+  return NS_OK;
+}
+
+void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) {
+  DispatchError(aElapsedTime, aCharIndex);
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) {
+  if (!mPreCanceled) {
+    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
+  }
+
+  return DispatchErrorImpl(aElapsedTime, aCharIndex);
+}
+
+nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime,
+                                         uint32_t aCharIndex) {
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl"));
+
+  DestroyAudioChannelAgent();
+
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(mState == STATE_ENDED)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  if (mSpeechSynthesis) {
+    mSpeechSynthesis->OnEnd(this);
+  }
+
+  mState = STATE_ENDED;
+  mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr,
+                                           aElapsedTime, u""_ns);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime,
+                               uint32_t aCharIndex, uint32_t aCharLength,
+                               uint8_t argc) {
+  return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength,
+                              argc);
+}
+
+nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
+                                            float aElapsedTime,
+                                            uint32_t aCharIndex,
+                                            uint32_t aCharLength,
+                                            uint8_t argc) {
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(mState != STATE_SPEAKING)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  mUtterance->DispatchSpeechSynthesisEvent(
+      u"boundary"_ns, aCharIndex,
+      argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr,
+      aElapsedTime, aName);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime,
+                           uint32_t aCharIndex) {
+  return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
+}
+
+nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
+                                        float aElapsedTime,
+                                        uint32_t aCharIndex) {
+  MOZ_ASSERT(mUtterance);
+  if (NS_WARN_IF(mState != STATE_SPEAKING)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr,
+                                           aElapsedTime, aName);
+  return NS_OK;
+}
+
+void nsSpeechTask::Pause() {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  if (mCallback) {
+    DebugOnly<nsresult> rv = mCallback->OnPause();
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
+  }
+
+  if (!mInited) {
+    mPrePaused = true;
+  }
+}
+
+void nsSpeechTask::Resume() {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  if (mCallback) {
+    DebugOnly<nsresult> rv = mCallback->OnResume();
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
+                         "Unable to call onResume() callback");
+  }
+
+  if (mPrePaused) {
+    mPrePaused = false;
+    nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
+  }
+}
+
+void nsSpeechTask::Cancel() {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
+
+  if (mCallback) {
+    DebugOnly<nsresult> rv = mCallback->OnCancel();
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
+                         "Unable to call onCancel() callback");
+  }
+
+  if (!mInited) {
+    mPreCanceled = true;
+  }
+}
+
+void nsSpeechTask::ForceEnd() {
+  if (!mInited) {
+    mPreCanceled = true;
+  }
+
+  DispatchEnd(0, 0);
+}
+
+void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) {
+  mSpeechSynthesis = aSpeechSynthesis;
+}
+
+void nsSpeechTask::CreateAudioChannelAgent() {
+  if (!mUtterance) {
+    return;
+  }
+
+  if (mAudioChannelAgent) {
+    mAudioChannelAgent->NotifyStoppedPlaying();
+  }
+
+  mAudioChannelAgent = new AudioChannelAgent();
+  mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwner(), this);
+
+  nsresult rv = mAudioChannelAgent->NotifyStartedPlaying(
+      AudioChannelService::AudibleState::eAudible);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    return;
+  }
+
+  mAudioChannelAgent->PullInitialUpdate();
+}
+
+void nsSpeechTask::DestroyAudioChannelAgent() {
+  if (mAudioChannelAgent) {
+    mAudioChannelAgent->NotifyStoppedPlaying();
+    mAudioChannelAgent = nullptr;
+  }
+}
+
+NS_IMETHODIMP
+nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) {
+  SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) {
+  if (!mUtterance) {
+    return NS_OK;
+  }
+
+  if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) {
+    Resume();
+  } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED &&
+             !mUtterance->mPaused) {
+    Pause();
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) {
+  // This is not supported yet.
+  return NS_OK;
+}
+
+void nsSpeechTask::SetAudioOutputVolume(float aVolume) {
+  if (mCallback) {
+    mCallback->OnVolumeChanged(aVolume);
+  }
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h
new file mode 100644
index 0000000000..fc121cf8f1
--- /dev/null
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -0,0 +1,128 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_nsSpeechTask_h
+#define mozilla_dom_nsSpeechTask_h
+
+#include "SpeechSynthesisUtterance.h"
+#include "AudioChannelAgent.h"
+#include "nsISpeechService.h"
+#include "nsWeakReference.h"
+
+namespace mozilla {
+
+class SharedBuffer;
+
+namespace dom {
+
+class SpeechSynthesisUtterance;
+class SpeechSynthesis;
+
+class nsSpeechTask : public nsISpeechTask,
+                     public nsIAudioChannelAgentCallback,
+                     public nsSupportsWeakReference {
+ public:
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSpeechTask, nsISpeechTask)
+
+  NS_DECL_NSISPEECHTASK
+  NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK
+
+  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance,
+                        bool aShouldResistFingerprinting);
+  nsSpeechTask(float aVolume, const nsAString& aText,
+               bool aShouldResistFingerprinting);
+
+  virtual void Pause();
+
+  virtual void Resume();
+
+  virtual void Cancel();
+
+  virtual void ForceEnd();
+
+  void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis);
+
+  void Init();
+
+  void SetChosenVoiceURI(const nsAString& aUri);
+
+  virtual void SetAudioOutputVolume(float aVolume);
+
+  void ForceError(float aElapsedTime, uint32_t aCharIndex);
+
+  bool IsPreCanceled() { return mPreCanceled; };
+
+  bool IsPrePaused() { return mPrePaused; }
+
+  bool ShouldResistFingerprinting() { return mShouldResistFingerprinting; }
+
+  enum { STATE_PENDING, STATE_SPEAKING, STATE_ENDED };
+
+  uint32_t GetState() const { return mState; }
+
+  bool IsSpeaking() const { return mState == STATE_SPEAKING; }
+
+  bool IsPending() const { return mState == STATE_PENDING; }
+
+ protected:
+  virtual ~nsSpeechTask();
+
+  nsresult DispatchStartImpl();
+
+  virtual nsresult DispatchStartImpl(const nsAString& aUri);
+
+  virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
+
+  virtual nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
+
+  virtual nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
+
+  virtual nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex);
+
+  virtual nsresult DispatchBoundaryImpl(const nsAString& aName,
+                                        float aElapsedTime, uint32_t aCharIndex,
+                                        uint32_t aCharLength, uint8_t argc);
+
+  virtual nsresult DispatchMarkImpl(const nsAString& aName, float aElapsedTime,
+                                    uint32_t aCharIndex);
+
+  RefPtr<SpeechSynthesisUtterance> mUtterance;
+
+  float mVolume;
+
+  nsString mText;
+
+  bool mInited;
+
+  bool mPrePaused;
+
+  bool mPreCanceled;
+
+ private:
+  void End();
+
+  void CreateAudioChannelAgent();
+
+  void DestroyAudioChannelAgent();
+
+  nsCOMPtr<nsISpeechTaskCallback> mCallback;
+
+  RefPtr<mozilla::dom::AudioChannelAgent> mAudioChannelAgent;
+
+  RefPtr<SpeechSynthesis> mSpeechSynthesis;
+
+  nsString mChosenVoiceURI;
+
+  bool mShouldResistFingerprinting;
+
+  uint32_t mState;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
new file mode 100644
index 0000000000..d289c81655
--- /dev/null
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -0,0 +1,762 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISpeechService.h"
+#include "nsServiceManagerUtils.h"
+#include "nsCategoryManagerUtils.h"
+
+#include "SpeechSynthesisUtterance.h"
+#include "SpeechSynthesisVoice.h"
+#include "nsContentUtils.h"
+#include "nsSynthVoiceRegistry.h"
+#include "nsSpeechTask.h"
+#include "AudioChannelService.h"
+
+#include "nsString.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/ContentChild.h"
+#include "mozilla/dom/ContentParent.h"
+#include "mozilla/dom/Document.h"
+#include "mozilla/intl/LocaleService.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/Unused.h"
+
+#include "SpeechSynthesisChild.h"
+#include "SpeechSynthesisParent.h"
+
+using mozilla::intl::LocaleService;
+
+#undef LOG
+extern mozilla::LogModule* GetSpeechSynthLog();
+#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
+
+namespace {
+
+void GetAllSpeechSynthActors(
+    nsTArray<mozilla::dom::SpeechSynthesisParent*>& aActors) {
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_ASSERT(aActors.IsEmpty());
+
+  AutoTArray<mozilla::dom::ContentParent*, 20> contentActors;
+  mozilla::dom::ContentParent::GetAll(contentActors);
+
+  for (uint32_t contentIndex = 0; contentIndex < contentActors.Length();
+       ++contentIndex) {
+    MOZ_ASSERT(contentActors[contentIndex]);
+
+    AutoTArray<mozilla::dom::PSpeechSynthesisParent*, 5> speechsynthActors;
+    contentActors[contentIndex]->ManagedPSpeechSynthesisParent(
+        speechsynthActors);
+
+    for (uint32_t speechsynthIndex = 0;
+         speechsynthIndex < speechsynthActors.Length(); ++speechsynthIndex) {
+      MOZ_ASSERT(speechsynthActors[speechsynthIndex]);
+
+      mozilla::dom::SpeechSynthesisParent* actor =
+          static_cast<mozilla::dom::SpeechSynthesisParent*>(
+              speechsynthActors[speechsynthIndex]);
+      aActors.AppendElement(actor);
+    }
+  }
+}
+
+}  // namespace
+
+namespace mozilla::dom {
+
+// VoiceData
+
+class VoiceData final {
+ private:
+  // Private destructor, to discourage deletion outside of Release():
+  ~VoiceData() = default;
+
+ public:
+  VoiceData(nsISpeechService* aService, const nsAString& aUri,
+            const nsAString& aName, const nsAString& aLang, bool aIsLocal,
+            bool aQueuesUtterances)
+      : mService(aService),
+        mUri(aUri),
+        mName(aName),
+        mLang(aLang),
+        mIsLocal(aIsLocal),
+        mIsQueued(aQueuesUtterances) {}
+
+  NS_INLINE_DECL_REFCOUNTING(VoiceData)
+
+  nsCOMPtr<nsISpeechService> mService;
+
+  nsString mUri;
+
+  nsString mName;
+
+  nsString mLang;
+
+  bool mIsLocal;
+
+  bool mIsQueued;
+};
+
+// GlobalQueueItem
+
+class GlobalQueueItem final {
+ private:
+  // Private destructor, to discourage deletion outside of Release():
+  ~GlobalQueueItem() = default;
+
+ public:
+  GlobalQueueItem(VoiceData* aVoice, nsSpeechTask* aTask,
+                  const nsAString& aText, const float& aVolume,
+                  const float& aRate, const float& aPitch)
+      : mVoice(aVoice),
+        mTask(aTask),
+        mText(aText),
+        mVolume(aVolume),
+        mRate(aRate),
+        mPitch(aPitch),
+        mIsLocal(false) {}
+
+  NS_INLINE_DECL_REFCOUNTING(GlobalQueueItem)
+
+  RefPtr<VoiceData> mVoice;
+
+  RefPtr<nsSpeechTask> mTask;
+
+  nsString mText;
+
+  float mVolume;
+
+  float mRate;
+
+  float mPitch;
+
+  bool mIsLocal;
+};
+
+// nsSynthVoiceRegistry
+
+static StaticRefPtr<nsSynthVoiceRegistry> gSynthVoiceRegistry;
+
+NS_IMPL_ISUPPORTS(nsSynthVoiceRegistry, nsISynthVoiceRegistry)
+
+nsSynthVoiceRegistry::nsSynthVoiceRegistry()
+    : mSpeechSynthChild(nullptr), mUseGlobalQueue(false), mIsSpeaking(false) {
+  if (XRE_IsContentProcess()) {
+    mSpeechSynthChild = new SpeechSynthesisChild();
+    ContentChild::GetSingleton()->SendPSpeechSynthesisConstructor(
+        mSpeechSynthChild);
+  }
+}
+
+nsSynthVoiceRegistry::~nsSynthVoiceRegistry() {
+  LOG(LogLevel::Debug, ("~nsSynthVoiceRegistry"));
+
+  // mSpeechSynthChild's lifecycle is managed by the Content protocol.
+  mSpeechSynthChild = nullptr;
+
+  mUriVoiceMap.Clear();
+}
+
+nsSynthVoiceRegistry* nsSynthVoiceRegistry::GetInstance() {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!gSynthVoiceRegistry) {
+    gSynthVoiceRegistry = new nsSynthVoiceRegistry();
+    ClearOnShutdown(&gSynthVoiceRegistry);
+    if (XRE_IsParentProcess()) {
+      // Start up all speech synth services.
+      NS_CreateServicesFromCategory(NS_SPEECH_SYNTH_STARTED, nullptr,
+                                    NS_SPEECH_SYNTH_STARTED);
+    }
+  }
+
+  return gSynthVoiceRegistry;
+}
+
+already_AddRefed<nsSynthVoiceRegistry>
+nsSynthVoiceRegistry::GetInstanceForService() {
+  RefPtr<nsSynthVoiceRegistry> registry = GetInstance();
+
+  return registry.forget();
+}
+
+bool nsSynthVoiceRegistry::SendInitialVoicesAndState(
+    SpeechSynthesisParent* aParent) {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  nsTArray<RemoteVoice> voices;
+  nsTArray<nsString> defaults;
+
+  for (uint32_t i = 0; i < mVoices.Length(); ++i) {
+    RefPtr<VoiceData> voice = mVoices[i];
+
+    voices.AppendElement(RemoteVoice(voice->mUri, voice->mName, voice->mLang,
+                                     voice->mIsLocal, voice->mIsQueued));
+  }
+
+  for (uint32_t i = 0; i < mDefaultVoices.Length(); ++i) {
+    defaults.AppendElement(mDefaultVoices[i]->mUri);
+  }
+
+  return aParent->SendInitialVoicesAndState(voices, defaults, IsSpeaking());
+}
+
+void nsSynthVoiceRegistry::RecvInitialVoicesAndState(
+    const nsTArray<RemoteVoice>& aVoices, const nsTArray<nsString>& aDefaults,
+    const bool& aIsSpeaking) {
+  // We really should have a local instance since this is a directed response to
+  // an Init() call.
+  MOZ_ASSERT(gSynthVoiceRegistry);
+
+  for (uint32_t i = 0; i < aVoices.Length(); ++i) {
+    RemoteVoice voice = aVoices[i];
+    gSynthVoiceRegistry->AddVoiceImpl(nullptr, voice.voiceURI(), voice.name(),
+                                      voice.lang(), voice.localService(),
+                                      voice.queued());
+  }
+
+  for (uint32_t i = 0; i < aDefaults.Length(); ++i) {
+    gSynthVoiceRegistry->SetDefaultVoice(aDefaults[i], true);
+  }
+
+  gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking;
+
+  if (aVoices.Length()) {
+    gSynthVoiceRegistry->NotifyVoicesChanged();
+  }
+}
+
+void nsSynthVoiceRegistry::RecvRemoveVoice(const nsAString& aUri) {
+  // If we dont have a local instance of the registry yet, we will recieve
+  // current voices at contruction time.
+  if (!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->RemoveVoice(nullptr, aUri);
+}
+
+void nsSynthVoiceRegistry::RecvAddVoice(const RemoteVoice& aVoice) {
+  // If we dont have a local instance of the registry yet, we will recieve
+  // current voices at contruction time.
+  if (!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->AddVoiceImpl(nullptr, aVoice.voiceURI(), aVoice.name(),
+                                    aVoice.lang(), aVoice.localService(),
+                                    aVoice.queued());
+}
+
+void nsSynthVoiceRegistry::RecvSetDefaultVoice(const nsAString& aUri,
+                                               bool aIsDefault) {
+  // If we dont have a local instance of the registry yet, we will recieve
+  // current voices at contruction time.
+  if (!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->SetDefaultVoice(aUri, aIsDefault);
+}
+
+void nsSynthVoiceRegistry::RecvIsSpeakingChanged(bool aIsSpeaking) {
+  // If we dont have a local instance of the registry yet, we will get the
+  // speaking state on construction.
+  if (!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking;
+}
+
+void nsSynthVoiceRegistry::RecvNotifyVoicesChanged() {
+  // If we dont have a local instance of the registry yet, we don't care.
+  if (!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->NotifyVoicesChanged();
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::AddVoice(nsISpeechService* aService,
+                               const nsAString& aUri, const nsAString& aName,
+                               const nsAString& aLang, bool aLocalService,
+                               bool aQueuesUtterances) {
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::AddVoice uri='%s' name='%s' lang='%s' local=%s "
+       "queued=%s",
+       NS_ConvertUTF16toUTF8(aUri).get(), NS_ConvertUTF16toUTF8(aName).get(),
+       NS_ConvertUTF16toUTF8(aLang).get(), aLocalService ? "true" : "false",
+       aQueuesUtterances ? "true" : "false"));
+
+  if (NS_WARN_IF(XRE_IsContentProcess())) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  return AddVoiceImpl(aService, aUri, aName, aLang, aLocalService,
+                      aQueuesUtterances);
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::RemoveVoice(nsISpeechService* aService,
+                                  const nsAString& aUri) {
+  LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::RemoveVoice uri='%s' (%s)",
+                        NS_ConvertUTF16toUTF8(aUri).get(),
+                        (XRE_IsContentProcess()) ? "child" : "parent"));
+
+  bool found = false;
+  VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found);
+
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if (NS_WARN_IF(!(aService == retval->mService))) {
+    return NS_ERROR_INVALID_ARG;
+  }
+
+  mVoices.RemoveElement(retval);
+  mDefaultVoices.RemoveElement(retval);
+  mUriVoiceMap.Remove(aUri);
+
+  if (retval->mIsQueued &&
+      !StaticPrefs::media_webspeech_synth_force_global_queue()) {
+    // Check if this is the last queued voice, and disable the global queue if
+    // it is.
+    bool queued = false;
+    for (uint32_t i = 0; i < mVoices.Length(); i++) {
+      VoiceData* voice = mVoices[i];
+      if (voice->mIsQueued) {
+        queued = true;
+        break;
+      }
+    }
+    if (!queued) {
+      mUseGlobalQueue = false;
+    }
+  }
+
+  nsTArray<SpeechSynthesisParent*> ssplist;
+  GetAllSpeechSynthActors(ssplist);
+
+  for (uint32_t i = 0; i < ssplist.Length(); ++i)
+    Unused << ssplist[i]->SendVoiceRemoved(aUri);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::NotifyVoicesChanged() {
+  if (XRE_IsParentProcess()) {
+    nsTArray<SpeechSynthesisParent*> ssplist;
+    GetAllSpeechSynthActors(ssplist);
+
+    for (uint32_t i = 0; i < ssplist.Length(); ++i)
+      Unused << ssplist[i]->SendNotifyVoicesChanged();
+  }
+
+  nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+  if (NS_WARN_IF(!(obs))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  obs->NotifyObservers(nullptr, "synth-voices-changed", nullptr);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::SetDefaultVoice(const nsAString& aUri, bool aIsDefault) {
+  bool found = false;
+  VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found);
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  mDefaultVoices.RemoveElement(retval);
+
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SetDefaultVoice %s %s",
+       NS_ConvertUTF16toUTF8(aUri).get(), aIsDefault ? "true" : "false"));
+
+  if (aIsDefault) {
+    mDefaultVoices.AppendElement(retval);
+  }
+
+  if (XRE_IsParentProcess()) {
+    nsTArray<SpeechSynthesisParent*> ssplist;
+    GetAllSpeechSynthActors(ssplist);
+
+    for (uint32_t i = 0; i < ssplist.Length(); ++i) {
+      Unused << ssplist[i]->SendSetDefaultVoice(aUri, aIsDefault);
+    }
+  }
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::GetVoiceCount(uint32_t* aRetval) {
+  *aRetval = mVoices.Length();
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::GetVoice(uint32_t aIndex, nsAString& aRetval) {
+  if (NS_WARN_IF(!(aIndex < mVoices.Length()))) {
+    return NS_ERROR_INVALID_ARG;
+  }
+
+  aRetval = mVoices[aIndex]->mUri;
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::IsDefaultVoice(const nsAString& aUri, bool* aRetval) {
+  bool found;
+  VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found);
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  for (int32_t i = mDefaultVoices.Length(); i > 0;) {
+    VoiceData* defaultVoice = mDefaultVoices[--i];
+
+    if (voice->mLang.Equals(defaultVoice->mLang)) {
+      *aRetval = voice == defaultVoice;
+      return NS_OK;
+    }
+  }
+
+  *aRetval = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::IsLocalVoice(const nsAString& aUri, bool* aRetval) {
+  bool found;
+  VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found);
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  *aRetval = voice->mIsLocal;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::GetVoiceLang(const nsAString& aUri, nsAString& aRetval) {
+  bool found;
+  VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found);
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  aRetval = voice->mLang;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsSynthVoiceRegistry::GetVoiceName(const nsAString& aUri, nsAString& aRetval) {
+  bool found;
+  VoiceData* voice = mUriVoiceMap.GetWeak(aUri, &found);
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  aRetval = voice->mName;
+  return NS_OK;
+}
+
+nsresult nsSynthVoiceRegistry::AddVoiceImpl(
+    nsISpeechService* aService, const nsAString& aUri, const nsAString& aName,
+    const nsAString& aLang, bool aLocalService, bool aQueuesUtterances) {
+  const bool found = mUriVoiceMap.Contains(aUri);
+  if (NS_WARN_IF(found)) {
+    return NS_ERROR_INVALID_ARG;
+  }
+
+  RefPtr<VoiceData> voice = new VoiceData(aService, aUri, aName, aLang,
+                                          aLocalService, aQueuesUtterances);
+
+  mVoices.AppendElement(voice);
+  mUriVoiceMap.InsertOrUpdate(aUri, std::move(voice));
+  mUseGlobalQueue |= aQueuesUtterances;
+
+  nsTArray<SpeechSynthesisParent*> ssplist;
+  GetAllSpeechSynthActors(ssplist);
+
+  if (!ssplist.IsEmpty()) {
+    mozilla::dom::RemoteVoice ssvoice(nsString(aUri), nsString(aName),
+                                      nsString(aLang), aLocalService,
+                                      aQueuesUtterances);
+
+    for (uint32_t i = 0; i < ssplist.Length(); ++i) {
+      Unused << ssplist[i]->SendVoiceAdded(ssvoice);
+    }
+  }
+
+  return NS_OK;
+}
+
+bool nsSynthVoiceRegistry::FindVoiceByLang(const nsAString& aLang,
+                                           VoiceData** aRetval) {
+  nsAString::const_iterator dashPos, start, end;
+  aLang.BeginReading(start);
+  aLang.EndReading(end);
+
+  while (true) {
+    nsAutoString langPrefix(Substring(start, end));
+
+    for (int32_t i = mDefaultVoices.Length(); i > 0;) {
+      VoiceData* voice = mDefaultVoices[--i];
+
+      if (StringBeginsWith(voice->mLang, langPrefix)) {
+        *aRetval = voice;
+        return true;
+      }
+    }
+
+    for (int32_t i = mVoices.Length(); i > 0;) {
+      VoiceData* voice = mVoices[--i];
+
+      if (StringBeginsWith(voice->mLang, langPrefix)) {
+        *aRetval = voice;
+        return true;
+      }
+    }
+
+    dashPos = end;
+    end = start;
+
+    if (!RFindInReadable(u"-"_ns, end, dashPos)) {
+      break;
+    }
+  }
+
+  return false;
+}
+
+VoiceData* nsSynthVoiceRegistry::FindBestMatch(const nsAString& aUri,
+                                               const nsAString& aLang) {
+  if (mVoices.IsEmpty()) {
+    return nullptr;
+  }
+
+  bool found = false;
+  VoiceData* retval = mUriVoiceMap.GetWeak(aUri, &found);
+
+  if (found) {
+    LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::FindBestMatch - Matched URI"));
+    return retval;
+  }
+
+  // Try finding a match for given voice.
+  if (!aLang.IsVoid() && !aLang.IsEmpty()) {
+    if (FindVoiceByLang(aLang, &retval)) {
+      LOG(LogLevel::Debug,
+          ("nsSynthVoiceRegistry::FindBestMatch - Matched language (%s ~= %s)",
+           NS_ConvertUTF16toUTF8(aLang).get(),
+           NS_ConvertUTF16toUTF8(retval->mLang).get()));
+
+      return retval;
+    }
+  }
+
+  // Try UI language.
+  nsAutoCString uiLang;
+  LocaleService::GetInstance()->GetAppLocaleAsBCP47(uiLang);
+
+  if (FindVoiceByLang(NS_ConvertASCIItoUTF16(uiLang), &retval)) {
+    LOG(LogLevel::Debug,
+        ("nsSynthVoiceRegistry::FindBestMatch - Matched UI language (%s ~= %s)",
+         uiLang.get(), NS_ConvertUTF16toUTF8(retval->mLang).get()));
+
+    return retval;
+  }
+
+  // Try en-US, the language of locale "C"
+  if (FindVoiceByLang(u"en-US"_ns, &retval)) {
+    LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::FindBestMatch - Matched C "
+                          "locale language (en-US ~= %s)",
+                          NS_ConvertUTF16toUTF8(retval->mLang).get()));
+
+    return retval;
+  }
+
+  // The top default voice is better than nothing...
+  if (!mDefaultVoices.IsEmpty()) {
+    return mDefaultVoices.LastElement();
+  }
+
+  return nullptr;
+}
+
+already_AddRefed<nsSpeechTask> nsSynthVoiceRegistry::SpeakUtterance(
+    SpeechSynthesisUtterance& aUtterance, const nsAString& aDocLang) {
+  nsString lang =
+      nsString(aUtterance.mLang.IsEmpty() ? aDocLang : aUtterance.mLang);
+  nsAutoString uri;
+
+  if (aUtterance.mVoice) {
+    aUtterance.mVoice->GetVoiceURI(uri);
+  }
+
+  // Get current audio volume to apply speech call
+  float volume = aUtterance.Volume();
+  RefPtr<AudioChannelService> service = AudioChannelService::GetOrCreate();
+  if (service) {
+    if (nsCOMPtr<nsPIDOMWindowInner> topWindow = aUtterance.GetOwner()) {
+      // TODO : use audio channel agent, open new bug to fix it.
+      AudioPlaybackConfig config =
+          service->GetMediaConfig(topWindow->GetOuterWindow());
+      volume = config.mMuted ? 0.0f : config.mVolume * volume;
+    }
+  }
+
+  RefPtr<nsSpeechTask> task;
+  if (XRE_IsContentProcess()) {
+    task = new SpeechTaskChild(&aUtterance,
+                               aUtterance.ShouldResistFingerprinting());
+    SpeechSynthesisRequestChild* actor = new SpeechSynthesisRequestChild(
+        static_cast<SpeechTaskChild*>(task.get()));
+    mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(
+        actor, aUtterance.mText, lang, uri, volume, aUtterance.Rate(),
+        aUtterance.Pitch(), aUtterance.ShouldResistFingerprinting());
+  } else {
+    task =
+        new nsSpeechTask(&aUtterance, aUtterance.ShouldResistFingerprinting());
+    Speak(aUtterance.mText, lang, uri, volume, aUtterance.Rate(),
+          aUtterance.Pitch(), task);
+  }
+
+  return task.forget();
+}
+
+void nsSynthVoiceRegistry::Speak(const nsAString& aText, const nsAString& aLang,
+                                 const nsAString& aUri, const float& aVolume,
+                                 const float& aRate, const float& aPitch,
+                                 nsSpeechTask* aTask) {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  if (aTask->ShouldResistFingerprinting()) {
+    aTask->ForceError(0, 0);
+    return;
+  }
+
+  VoiceData* voice = FindBestMatch(aUri, aLang);
+
+  if (!voice) {
+    NS_WARNING("No voices found.");
+    aTask->ForceError(0, 0);
+    return;
+  }
+
+  aTask->SetChosenVoiceURI(voice->mUri);
+
+  if (mUseGlobalQueue ||
+      StaticPrefs::media_webspeech_synth_force_global_queue()) {
+    LOG(LogLevel::Debug,
+        ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' "
+         "rate=%f pitch=%f",
+         NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(),
+         NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch));
+    RefPtr<GlobalQueueItem> item =
+        new GlobalQueueItem(voice, aTask, aText, aVolume, aRate, aPitch);
+    mGlobalQueue.AppendElement(item);
+
+    if (mGlobalQueue.Length() == 1) {
+      SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+                item->mRate, item->mPitch);
+    }
+  } else {
+    SpeakImpl(voice, aTask, aText, aVolume, aRate, aPitch);
+  }
+}
+
+void nsSynthVoiceRegistry::SpeakNext() {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SpeakNext %d", mGlobalQueue.IsEmpty()));
+
+  SetIsSpeaking(false);
+
+  if (mGlobalQueue.IsEmpty()) {
+    return;
+  }
+
+  mGlobalQueue.RemoveElementAt(0);
+
+  while (!mGlobalQueue.IsEmpty()) {
+    RefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0);
+    if (item->mTask->IsPreCanceled()) {
+      mGlobalQueue.RemoveElementAt(0);
+      continue;
+    }
+    if (!item->mTask->IsPrePaused()) {
+      SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+                item->mRate, item->mPitch);
+    }
+    break;
+  }
+}
+
+void nsSynthVoiceRegistry::ResumeQueue() {
+  MOZ_ASSERT(XRE_IsParentProcess());
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::ResumeQueue %d", mGlobalQueue.IsEmpty()));
+
+  if (mGlobalQueue.IsEmpty()) {
+    return;
+  }
+
+  RefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0);
+  if (!item->mTask->IsPrePaused()) {
+    SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+              item->mRate, item->mPitch);
+  }
+}
+
+bool nsSynthVoiceRegistry::IsSpeaking() { return mIsSpeaking; }
+
+void nsSynthVoiceRegistry::SetIsSpeaking(bool aIsSpeaking) {
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  // Only set to 'true' if global queue is enabled.
+  mIsSpeaking =
+      aIsSpeaking && (mUseGlobalQueue ||
+                      StaticPrefs::media_webspeech_synth_force_global_queue());
+
+  nsTArray<SpeechSynthesisParent*> ssplist;
+  GetAllSpeechSynthActors(ssplist);
+  for (uint32_t i = 0; i < ssplist.Length(); ++i) {
+    Unused << ssplist[i]->SendIsSpeakingChanged(aIsSpeaking);
+  }
+}
+
+void nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice, nsSpeechTask* aTask,
+                                     const nsAString& aText,
+                                     const float& aVolume, const float& aRate,
+                                     const float& aPitch) {
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f "
+       "pitch=%f",
+       NS_ConvertUTF16toUTF8(aText).get(),
+       NS_ConvertUTF16toUTF8(aVoice->mUri).get(), aRate, aPitch));
+
+  aTask->Init();
+
+  if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate,
+                                        aPitch, aTask))) {
+    aTask->DispatchError(0, 0);
+  }
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
new file mode 100644
index 0000000000..85c67c087f
--- /dev/null
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
@@ -0,0 +1,99 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_nsSynthVoiceRegistry_h
+#define mozilla_dom_nsSynthVoiceRegistry_h
+
+#include "nsISynthVoiceRegistry.h"
+#include "nsRefPtrHashtable.h"
+#include "nsTArray.h"
+
+class nsISpeechService;
+
+namespace mozilla::dom {
+
+class RemoteVoice;
+class SpeechSynthesisUtterance;
+class SpeechSynthesisChild;
+class SpeechSynthesisParent;
+class nsSpeechTask;
+class VoiceData;
+class GlobalQueueItem;
+
+class nsSynthVoiceRegistry final : public nsISynthVoiceRegistry {
+ public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISYNTHVOICEREGISTRY
+
+  nsSynthVoiceRegistry();
+
+  already_AddRefed<nsSpeechTask> SpeakUtterance(
+      SpeechSynthesisUtterance& aUtterance, const nsAString& aDocLang);
+
+  void Speak(const nsAString& aText, const nsAString& aLang,
+             const nsAString& aUri, const float& aVolume, const float& aRate,
+             const float& aPitch, nsSpeechTask* aTask);
+
+  bool SendInitialVoicesAndState(SpeechSynthesisParent* aParent);
+
+  void SpeakNext();
+
+  void ResumeQueue();
+
+  bool IsSpeaking();
+
+  void SetIsSpeaking(bool aIsSpeaking);
+
+  static nsSynthVoiceRegistry* GetInstance();
+
+  static already_AddRefed<nsSynthVoiceRegistry> GetInstanceForService();
+
+  static void RecvInitialVoicesAndState(const nsTArray<RemoteVoice>& aVoices,
+                                        const nsTArray<nsString>& aDefaults,
+                                        const bool& aIsSpeaking);
+
+  static void RecvRemoveVoice(const nsAString& aUri);
+
+  static void RecvAddVoice(const RemoteVoice& aVoice);
+
+  static void RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault);
+
+  static void RecvIsSpeakingChanged(bool aIsSpeaking);
+
+  static void RecvNotifyVoicesChanged();
+
+ private:
+  virtual ~nsSynthVoiceRegistry();
+
+  VoiceData* FindBestMatch(const nsAString& aUri, const nsAString& lang);
+
+  bool FindVoiceByLang(const nsAString& aLang, VoiceData** aRetval);
+
+  nsresult AddVoiceImpl(nsISpeechService* aService, const nsAString& aUri,
+                        const nsAString& aName, const nsAString& aLang,
+                        bool aLocalService, bool aQueuesUtterances);
+
+  void SpeakImpl(VoiceData* aVoice, nsSpeechTask* aTask, const nsAString& aText,
+                 const float& aVolume, const float& aRate, const float& aPitch);
+
+  nsTArray<RefPtr<VoiceData>> mVoices;
+
+  nsTArray<RefPtr<VoiceData>> mDefaultVoices;
+
+  nsRefPtrHashtable<nsStringHashKey, VoiceData> mUriVoiceMap;
+
+  SpeechSynthesisChild* mSpeechSynthChild;
+
+  bool mUseGlobalQueue;
+
+  nsTArray<RefPtr<GlobalQueueItem>> mGlobalQueue;
+
+  bool mIsSpeaking;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
new file mode 100644
index 0000000000..e0d5488748
--- /dev/null
+++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
@@ -0,0 +1,538 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechDispatcherService.h"
+
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "nsEscape.h"
+#include "nsISupports.h"
+#include "nsPrintfCString.h"
+#include "nsReadableUtils.h"
+#include "nsServiceManagerUtils.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "prlink.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#define URI_PREFIX "urn:moz-tts:speechd:"
+
+#define MAX_RATE static_cast<float>(2.5)
+#define MIN_RATE static_cast<float>(0.5)
+
+// Some structures for libspeechd
+typedef enum {
+  SPD_EVENT_BEGIN,
+  SPD_EVENT_END,
+  SPD_EVENT_INDEX_MARK,
+  SPD_EVENT_CANCEL,
+  SPD_EVENT_PAUSE,
+  SPD_EVENT_RESUME
+} SPDNotificationType;
+
+typedef enum {
+  SPD_BEGIN = 1,
+  SPD_END = 2,
+  SPD_INDEX_MARKS = 4,
+  SPD_CANCEL = 8,
+  SPD_PAUSE = 16,
+  SPD_RESUME = 32,
+
+  SPD_ALL = 0x3f
+} SPDNotification;
+
+typedef enum { SPD_MODE_SINGLE = 0, SPD_MODE_THREADED = 1 } SPDConnectionMode;
+
+typedef void (*SPDCallback)(size_t msg_id, size_t client_id,
+                            SPDNotificationType state);
+
+typedef void (*SPDCallbackIM)(size_t msg_id, size_t client_id,
+                              SPDNotificationType state, char* index_mark);
+
+struct SPDConnection {
+  SPDCallback callback_begin;
+  SPDCallback callback_end;
+  SPDCallback callback_cancel;
+  SPDCallback callback_pause;
+  SPDCallback callback_resume;
+  SPDCallbackIM callback_im;
+
+  /* partial, more private fields in structure */
+};
+
+struct SPDVoice {
+  char* name;
+  char* language;
+  char* variant;
+};
+
+typedef enum {
+  SPD_IMPORTANT = 1,
+  SPD_MESSAGE = 2,
+  SPD_TEXT = 3,
+  SPD_NOTIFICATION = 4,
+  SPD_PROGRESS = 5
+} SPDPriority;
+
+#define SPEECHD_FUNCTIONS                                           \
+  FUNC(spd_open, SPDConnection*,                                    \
+       (const char*, const char*, const char*, SPDConnectionMode))  \
+  FUNC(spd_close, void, (SPDConnection*))                           \
+  FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*))     \
+  FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*))    \
+  FUNC(spd_cancel, int, (SPDConnection*))                           \
+  FUNC(spd_set_volume, int, (SPDConnection*, int))                  \
+  FUNC(spd_set_voice_rate, int, (SPDConnection*, int))              \
+  FUNC(spd_set_voice_pitch, int, (SPDConnection*, int))             \
+  FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \
+  FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification))
+
+#define FUNC(name, type, params)      \
+  typedef type(*_##name##_fn) params; \
+  static _##name##_fn _##name;
+
+SPEECHD_FUNCTIONS
+
+#undef FUNC
+
+#define spd_open _spd_open
+#define spd_close _spd_close
+#define spd_list_synthesis_voices _spd_list_synthesis_voices
+#define spd_say _spd_say
+#define spd_cancel _spd_cancel
+#define spd_set_volume _spd_set_volume
+#define spd_set_voice_rate _spd_set_voice_rate
+#define spd_set_voice_pitch _spd_set_voice_pitch
+#define spd_set_synthesis_voice _spd_set_synthesis_voice
+#define spd_set_notification_on _spd_set_notification_on
+
+static PRLibrary* speechdLib = nullptr;
+
+typedef void (*nsSpeechDispatcherFunc)();
+struct nsSpeechDispatcherDynamicFunction {
+  const char* functionName;
+  nsSpeechDispatcherFunc* function;
+};
+
+namespace mozilla::dom {
+
+StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton;
+
+class SpeechDispatcherVoice {
+ public:
+  SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage)
+      : mName(aName), mLanguage(aLanguage) {}
+
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice)
+
+  // Voice name
+  nsString mName;
+
+  // Voice language, in BCP-47 syntax
+  nsString mLanguage;
+
+ private:
+  ~SpeechDispatcherVoice() = default;
+};
+
+class SpeechDispatcherCallback final : public nsISpeechTaskCallback {
+ public:
+  SpeechDispatcherCallback(nsISpeechTask* aTask,
+                           SpeechDispatcherService* aService)
+      : mTask(aTask), mService(aService) {}
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback,
+                                           nsISpeechTaskCallback)
+
+  NS_DECL_NSISPEECHTASKCALLBACK
+
+  bool OnSpeechEvent(SPDNotificationType state);
+
+ private:
+  ~SpeechDispatcherCallback() = default;
+
+  // This pointer is used to dispatch events
+  nsCOMPtr<nsISpeechTask> mTask;
+
+  // By holding a strong reference to the service we guarantee that it won't be
+  // destroyed before this runnable.
+  RefPtr<SpeechDispatcherService> mService;
+
+  TimeStamp mStartTime;
+};
+
+NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback)
+
+NS_IMETHODIMP
+SpeechDispatcherCallback::OnPause() {
+  // XXX: Speech dispatcher does not pause immediately, but waits for the speech
+  // to reach an index mark so that it could resume from that offset.
+  // There is no support for word or sentence boundaries, so index marks would
+  // only occur in explicit SSML marks, and we don't support that yet.
+  // What in actuality happens, is that if you call spd_pause(), it will speak
+  // the utterance in its entirety, dispatch an end event, and then put speechd
+  // in a 'paused' state. Since it is after the utterance ended, we don't get
+  // that state change, and our speech api is in an unrecoverable state.
+  // So, since it is useless anyway, I am not implementing pause.
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechDispatcherCallback::OnResume() {
+  // XXX: Unsupported, see OnPause().
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechDispatcherCallback::OnCancel() {
+  if (spd_cancel(mService->mSpeechdClient) < 0) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechDispatcherCallback::OnVolumeChanged(float aVolume) {
+  // XXX: This currently does not change the volume mid-utterance, but it
+  // doesn't do anything bad either. So we could put this here with the hopes
+  // that speechd supports this in the future.
+  if (spd_set_volume(mService->mSpeechdClient,
+                     static_cast<int>(aVolume * 100)) < 0) {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
+bool SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) {
+  bool remove = false;
+
+  switch (state) {
+    case SPD_EVENT_BEGIN:
+      mStartTime = TimeStamp::Now();
+      mTask->DispatchStart();
+      break;
+
+    case SPD_EVENT_PAUSE:
+      mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
+      break;
+
+    case SPD_EVENT_RESUME:
+      mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
+      break;
+
+    case SPD_EVENT_CANCEL:
+    case SPD_EVENT_END:
+      mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
+      remove = true;
+      break;
+
+    case SPD_EVENT_INDEX_MARK:
+      // Not yet supported
+      break;
+
+    default:
+      break;
+  }
+
+  return remove;
+}
+
+static void speechd_cb(size_t msg_id, size_t client_id,
+                       SPDNotificationType state) {
+  SpeechDispatcherService* service =
+      SpeechDispatcherService::GetInstance(false);
+
+  if (service) {
+    NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>(
+        "dom::SpeechDispatcherService::EventNotify", service,
+        &SpeechDispatcherService::EventNotify, static_cast<uint32_t>(msg_id),
+        state));
+  }
+}
+
+NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(SpeechDispatcherService)
+NS_IMPL_RELEASE(SpeechDispatcherService)
+
+SpeechDispatcherService::SpeechDispatcherService()
+    : mInitialized(false), mSpeechdClient(nullptr) {}
+
+void SpeechDispatcherService::Init() {
+  if (!StaticPrefs::media_webspeech_synth_enabled() ||
+      Preferences::GetBool("media.webspeech.synth.test")) {
+    return;
+  }
+
+  // While speech dispatcher has a "threaded" mode, only spd_say() is async.
+  // Since synchronous socket i/o could impact startup time, we do
+  // initialization in a separate thread.
+  DebugOnly<nsresult> rv =
+      NS_NewNamedThread("speechd init", getter_AddRefs(mInitThread));
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+  rv = mInitThread->Dispatch(
+      NewRunnableMethod("dom::SpeechDispatcherService::Setup", this,
+                        &SpeechDispatcherService::Setup),
+      NS_DISPATCH_NORMAL);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+}
+
+SpeechDispatcherService::~SpeechDispatcherService() {
+  if (mInitThread) {
+    mInitThread->Shutdown();
+  }
+
+  if (mSpeechdClient) {
+    spd_close(mSpeechdClient);
+  }
+}
+
+void SpeechDispatcherService::Setup() {
+#define FUNC(name, type, params) {#name, (nsSpeechDispatcherFunc*)&_##name},
+  static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = {
+      SPEECHD_FUNCTIONS};
+#undef FUNC
+
+  MOZ_ASSERT(!mInitialized);
+
+  speechdLib = PR_LoadLibrary("libspeechd.so.2");
+
+  if (!speechdLib) {
+    NS_WARNING("Failed to load speechd library");
+    return;
+  }
+
+  if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) {
+    // There is no version getter function, so we rely on a symbol that was
+    // introduced in release 0.8.2 in order to check for ABI compatibility.
+    NS_WARNING("Unsupported version of speechd detected");
+    return;
+  }
+
+  for (uint32_t i = 0; i < ArrayLength(kSpeechDispatcherSymbols); i++) {
+    *kSpeechDispatcherSymbols[i].function = PR_FindFunctionSymbol(
+        speechdLib, kSpeechDispatcherSymbols[i].functionName);
+
+    if (!*kSpeechDispatcherSymbols[i].function) {
+      NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'",
+                                 kSpeechDispatcherSymbols[i].functionName)
+                     .get());
+      return;
+    }
+  }
+
+  mSpeechdClient =
+      spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED);
+  if (!mSpeechdClient) {
+    NS_WARNING("Failed to call spd_open");
+    return;
+  }
+
+  // Get all the voices from sapi and register in the SynthVoiceRegistry
+  SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient);
+
+  mSpeechdClient->callback_begin = speechd_cb;
+  mSpeechdClient->callback_end = speechd_cb;
+  mSpeechdClient->callback_cancel = speechd_cb;
+  mSpeechdClient->callback_pause = speechd_cb;
+  mSpeechdClient->callback_resume = speechd_cb;
+
+  spd_set_notification_on(mSpeechdClient, SPD_BEGIN);
+  spd_set_notification_on(mSpeechdClient, SPD_END);
+  spd_set_notification_on(mSpeechdClient, SPD_CANCEL);
+
+  if (list != NULL) {
+    for (int i = 0; list[i]; i++) {
+      nsAutoString uri;
+
+      uri.AssignLiteral(URI_PREFIX);
+      nsAutoCString name;
+      NS_EscapeURL(list[i]->name, -1,
+                   esc_OnlyNonASCII | esc_Spaces | esc_AlwaysCopy, name);
+      uri.Append(NS_ConvertUTF8toUTF16(name));
+
+      uri.AppendLiteral("?");
+
+      nsAutoCString lang(list[i]->language);
+
+      uri.Append(NS_ConvertUTF8toUTF16(lang));
+
+      mVoices.InsertOrUpdate(uri, MakeRefPtr<SpeechDispatcherVoice>(
+                                      NS_ConvertUTF8toUTF16(list[i]->name),
+                                      NS_ConvertUTF8toUTF16(lang)));
+    }
+  }
+
+  NS_DispatchToMainThread(
+      NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices", this,
+                        &SpeechDispatcherService::RegisterVoices));
+
+  // mInitialized = true;
+}
+
+// private methods
+
+void SpeechDispatcherService::RegisterVoices() {
+  RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
+  for (const auto& entry : mVoices) {
+    const RefPtr<SpeechDispatcherVoice>& voice = entry.GetData();
+
+    // This service can only speak one utterance at a time, so we set
+    // aQueuesUtterances to true in order to track global state and schedule
+    // access to this service.
+    DebugOnly<nsresult> rv =
+        registry->AddVoice(this, entry.GetKey(), voice->mName, voice->mLanguage,
+                           voice->mName.EqualsLiteral("default"), true);
+
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
+  }
+
+  mInitThread->Shutdown();
+  mInitThread = nullptr;
+
+  mInitialized = true;
+
+  registry->NotifyVoicesChanged();
+}
+
+// nsIObserver
+
+NS_IMETHODIMP
+SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic,
+                                 const char16_t* aData) {
+  return NS_OK;
+}
+
+// nsISpeechService
+
+// TODO: Support SSML
+NS_IMETHODIMP
+SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
+                               float aVolume, float aRate, float aPitch,
+                               nsISpeechTask* aTask) {
+  if (NS_WARN_IF(!mInitialized)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  RefPtr<SpeechDispatcherCallback> callback =
+      new SpeechDispatcherCallback(aTask, this);
+
+  bool found = false;
+  SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found);
+
+  if (NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  spd_set_synthesis_voice(mSpeechdClient,
+                          NS_ConvertUTF16toUTF8(voice->mName).get());
+
+  // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100.
+  spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100));
+
+  // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate.
+  // speechd expects -100 to 100 with 0 being normal rate.
+  float rate = 0;
+  if (aRate > 1) {
+    // Each step to 100 is logarithmically distributed up to 2.5x.
+    rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100;
+  } else if (aRate < 1) {
+    // Each step to -100 is logarithmically distributed down to 0.5x.
+    rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100;
+  }
+
+  spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));
+
+  // We provide a pitch of 0 to 2 with 1 being the default.
+  // speech-dispatcher expects -100 to 100 with 0 being default.
+  spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));
+
+  nsresult rv = aTask->Setup(callback);
+
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  if (aText.Length()) {
+    int msg_id = spd_say(mSpeechdClient, SPD_MESSAGE,
+                         NS_ConvertUTF16toUTF8(aText).get());
+
+    if (msg_id < 0) {
+      return NS_ERROR_FAILURE;
+    }
+
+    mCallbacks.InsertOrUpdate(msg_id, std::move(callback));
+  } else {
+    // Speech dispatcher does not work well with empty strings.
+    // In that case, don't send empty string to speechd,
+    // and just emulate a speechd start and end event.
+    NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
+        "dom::SpeechDispatcherCallback::OnSpeechEvent", callback,
+        &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN));
+
+    NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
+        "dom::SpeechDispatcherCallback::OnSpeechEvent", callback,
+        &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END));
+  }
+
+  return NS_OK;
+}
+
+SpeechDispatcherService* SpeechDispatcherService::GetInstance(bool create) {
+  if (XRE_GetProcessType() != GeckoProcessType_Default) {
+    MOZ_ASSERT(
+        false,
+        "SpeechDispatcherService can only be started on main gecko process");
+    return nullptr;
+  }
+
+  if (!sSingleton && create) {
+    sSingleton = new SpeechDispatcherService();
+    sSingleton->Init();
+    ClearOnShutdown(&sSingleton);
+  }
+
+  return sSingleton;
+}
+
+already_AddRefed<SpeechDispatcherService>
+SpeechDispatcherService::GetInstanceForService() {
+  MOZ_ASSERT(NS_IsMainThread());
+  RefPtr<SpeechDispatcherService> sapiService = GetInstance();
+  return sapiService.forget();
+}
+
+void SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) {
+  SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId);
+
+  if (callback) {
+    if (callback->OnSpeechEvent((SPDNotificationType)aState)) {
+      mCallbacks.Remove(aMsgId);
+    }
+  }
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h
new file mode 100644
index 0000000000..2922053c80
--- /dev/null
+++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SpeechDispatcherService_h
+#define mozilla_dom_SpeechDispatcherService_h
+
+#include "mozilla/StaticPtr.h"
+#include "nsIObserver.h"
+#include "nsISpeechService.h"
+#include "nsIThread.h"
+#include "nsRefPtrHashtable.h"
+#include "nsTArray.h"
+
+struct SPDConnection;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechDispatcherCallback;
+class SpeechDispatcherVoice;
+
+class SpeechDispatcherService final : public nsIObserver,
+                                      public nsISpeechService {
+  friend class SpeechDispatcherCallback;
+
+ public:
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSIOBSERVER
+  NS_DECL_NSISPEECHSERVICE
+
+  SpeechDispatcherService();
+
+  void Init();
+
+  void Setup();
+
+  void EventNotify(uint32_t aMsgId, uint32_t aState);
+
+  static SpeechDispatcherService* GetInstance(bool create = true);
+  static already_AddRefed<SpeechDispatcherService> GetInstanceForService();
+
+  static StaticRefPtr<SpeechDispatcherService> sSingleton;
+
+ private:
+  virtual ~SpeechDispatcherService();
+
+  void RegisterVoices();
+
+  bool mInitialized;
+
+  SPDConnection* mSpeechdClient;
+
+  nsRefPtrHashtable<nsUint32HashKey, SpeechDispatcherCallback> mCallbacks;
+
+  nsCOMPtr<nsIThread> mInitThread;
+
+  nsRefPtrHashtable<nsStringHashKey, SpeechDispatcherVoice> mVoices;
+};
+
+}  // namespace dom
+}  // namespace mozilla
+#endif
diff --git a/dom/media/webspeech/synth/speechd/components.conf b/dom/media/webspeech/synth/speechd/components.conf
new file mode 100644
index 0000000000..56b01ba5cb
--- /dev/null
+++ b/dom/media/webspeech/synth/speechd/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'cid': '{8817b1cf-5ada-43bf-bd73-607657703d0d}',
+        'contract_ids': ['@mozilla.org/synthspeechdispatcher;1'],
+        'singleton': True,
+        'type': 'mozilla::dom::SpeechDispatcherService',
+        'headers': ['/dom/media/webspeech/synth/speechd/SpeechDispatcherService.h'],
+        'constructor': 'mozilla::dom::SpeechDispatcherService::GetInstanceForService',
+        'categories': {"speech-synth-started": 'SpeechDispatcher Speech Synth'},
+    },
+]
diff --git a/dom/media/webspeech/synth/speechd/moz.build b/dom/media/webspeech/synth/speechd/moz.build
new file mode 100644
index 0000000000..0d9632a488
--- /dev/null
+++ b/dom/media/webspeech/synth/speechd/moz.build
@@ -0,0 +1,15 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += ["SpeechDispatcherService.cpp"]
+
+XPCOM_MANIFESTS += [
+    "components.conf",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"
diff --git a/dom/media/webspeech/synth/test/common.js b/dom/media/webspeech/synth/test/common.js
new file mode 100644
index 0000000000..c22b0b488c
--- /dev/null
+++ b/dom/media/webspeech/synth/test/common.js
@@ -0,0 +1,104 @@
+function synthTestQueue(aTestArgs, aEndFunc) {
+  var utterances = [];
+  for (var i in aTestArgs) {
+    var uargs = aTestArgs[i][0];
+    var win = uargs.win || window;
+    var u = new win.SpeechSynthesisUtterance(uargs.text);
+
+    if (uargs.args) {
+      for (var attr in uargs.args) {
+        u[attr] = uargs.args[attr];
+      }
+    }
+
+    function onend_handler(e) {
+      is(e.target, utterances.shift(), "Target matches utterances");
+      ok(!speechSynthesis.speaking, "speechSynthesis is not speaking.");
+
+      if (utterances.length) {
+        ok(speechSynthesis.pending, "other utterances queued");
+      } else {
+        ok(!speechSynthesis.pending, "queue is empty, nothing pending.");
+        if (aEndFunc) {
+          aEndFunc();
+        }
+      }
+    }
+
+    u.addEventListener(
+      "start",
+      (function (expectedUri) {
+        return function (e) {
+          if (expectedUri) {
+            var chosenVoice = SpecialPowers.wrap(e).target.chosenVoiceURI;
+            is(chosenVoice, expectedUri, "Incorrect URI is used");
+          }
+        };
+      })(aTestArgs[i][1] ? aTestArgs[i][1].uri : null)
+    );
+
+    u.addEventListener("end", onend_handler);
+    u.addEventListener("error", onend_handler);
+
+    u.addEventListener(
+      "error",
+      (function (expectedError) {
+        return function onerror_handler(e) {
+          ok(
+            expectedError,
+            "Error in speech utterance '" + e.target.text + "'"
+          );
+        };
+      })(aTestArgs[i][1] ? aTestArgs[i][1].err : false)
+    );
+
+    utterances.push(u);
+    win.speechSynthesis.speak(u);
+  }
+
+  ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet.");
+  ok(speechSynthesis.pending, "speechSynthesis has an utterance queued.");
+}
+
+function loadFrame(frameId) {
+  return new Promise(function (resolve, reject) {
+    var frame = document.getElementById(frameId);
+    frame.addEventListener("load", function (e) {
+      frame.contentWindow.document.title = frameId;
+      resolve(frame);
+    });
+    frame.src = "about:blank";
+  });
+}
+
+function waitForVoices(win) {
+  return new Promise(resolve => {
+    function resolver() {
+      if (win.speechSynthesis.getVoices().length) {
+        win.speechSynthesis.removeEventListener("voiceschanged", resolver);
+        resolve();
+      }
+    }
+
+    win.speechSynthesis.addEventListener("voiceschanged", resolver);
+    resolver();
+  });
+}
+
+function loadSpeechTest(fileName, prefs, frameId = "testFrame") {
+  loadFrame(frameId).then(frame => {
+    waitForVoices(frame.contentWindow).then(
+      () => (document.getElementById("testFrame").src = fileName)
+    );
+  });
+}
+
+function testSynthState(win, expectedState) {
+  for (var attr in expectedState) {
+    is(
+      win.speechSynthesis[attr],
+      expectedState[attr],
+      win.document.title + ": '" + attr + '" does not match'
+    );
+  }
+}
diff --git a/dom/media/webspeech/synth/test/components.conf b/dom/media/webspeech/synth/test/components.conf
new file mode 100644
index 0000000000..f37e4eafae
--- /dev/null
+++ b/dom/media/webspeech/synth/test/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'cid': '{e7d52d9e-c148-47d8-ab2a-95d7f40ea53d}',
+        'contract_ids': ['@mozilla.org/fakesynth;1'],
+        'singleton': True,
+        'type': 'mozilla::dom::nsFakeSynthServices',
+        'headers': ['/dom/media/webspeech/synth/test/nsFakeSynthServices.h'],
+        'constructor': 'mozilla::dom::nsFakeSynthServices::GetInstanceForService',
+        'categories': {'speech-synth-started': 'Fake Speech Synth'},
+    },
+]
diff --git a/dom/media/webspeech/synth/test/file_bfcache_page1.html b/dom/media/webspeech/synth/test/file_bfcache_page1.html
new file mode 100644
index 0000000000..d6229eeeda
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_bfcache_page1.html
@@ -0,0 +1,18 @@
+<!DOCTYPE HTML>
+<html>
+<head>
+  <meta charset="utf-8">
+  <script type="application/javascript">
+    addEventListener('pageshow', function onshow(evt) {
+      var u = new SpeechSynthesisUtterance('hello');
+      u.lang = 'it-IT-noend';
+      u.addEventListener('start', function() {
+        location = "file_bfcache_page2.html";
+      });
+      speechSynthesis.speak(u);
+    });
+  </script>
+</head>
+<body>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_bfcache_page2.html b/dom/media/webspeech/synth/test/file_bfcache_page2.html
new file mode 100644
index 0000000000..30b9aa9117
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_bfcache_page2.html
@@ -0,0 +1,14 @@
+<html>
+<script>
+var frameUnloaded = function() {
+  var u = new SpeechSynthesisUtterance('hi');
+  u.addEventListener('end', function () {
+    opener.ok(true, 'Successfully spoke utterance from new frame.');
+    opener.onDone();
+  });
+  speechSynthesis.speak(u);
+};
+</script>
+
+<body onpageshow="frameUnloaded()"></body></html>
+
diff --git a/dom/media/webspeech/synth/test/file_global_queue.html b/dom/media/webspeech/synth/test/file_global_queue.html
new file mode 100644
index 0000000000..5d762c0d51
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_global_queue.html
@@ -0,0 +1,69 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1188099
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1188099: Global queue should correctly schedule utterances</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+    window.todo = parent.todo;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a>
+<iframe id="frame1"></iframe>
+<iframe id="frame2"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+  Promise.all([loadFrame('frame1'), loadFrame('frame2')]).then(function ([frame1, frame2]) {
+    var win1 = frame1.contentWindow;
+    var win2 = frame2.contentWindow;
+    var utterance1 = new win1.SpeechSynthesisUtterance("hello, losers");
+    var utterance2 = new win1.SpeechSynthesisUtterance("hello, losers three");
+    var utterance3 = new win2.SpeechSynthesisUtterance("hello, losers too");
+    var eventOrder = ['start1', 'end1', 'start3', 'end3', 'start2', 'end2'];
+    utterance1.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start1', 'start1');
+      testSynthState(win1, { speaking: true, pending: true });
+      testSynthState(win2, { speaking: true, pending: true });
+    });
+    utterance1.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end1', 'end1');
+    });
+    utterance3.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start3', 'start3');
+      testSynthState(win1, { speaking: true, pending: true });
+      testSynthState(win2, { speaking: true, pending: false });
+    });
+    utterance3.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end3', 'end3');
+    });
+    utterance2.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start2', 'start2');
+      testSynthState(win1, { speaking: true, pending: false });
+      testSynthState(win2, { speaking: true, pending: false });
+    });
+    utterance2.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end2', 'end2');
+      testSynthState(win1, { speaking: false, pending: false });
+      testSynthState(win2, { speaking: false, pending: false });
+      SimpleTest.finish();
+    });
+    win1.speechSynthesis.speak(utterance1);
+    win1.speechSynthesis.speak(utterance2);
+    win2.speechSynthesis.speak(utterance3);
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_global_queue_cancel.html b/dom/media/webspeech/synth/test/file_global_queue_cancel.html
new file mode 100644
index 0000000000..03b77ba2fc
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_global_queue_cancel.html
@@ -0,0 +1,88 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1188099
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1188099: Calling cancel() should work correctly with global queue</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+    window.todo = parent.todo;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a>
+<iframe id="frame1"></iframe>
+<iframe id="frame2"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+  Promise.all([loadFrame('frame1'), loadFrame('frame2')]).then(function ([frame1, frame2]) {
+    var win1 = frame1.contentWindow;
+    var win2 = frame2.contentWindow;
+
+    var utterance1 = new win1.SpeechSynthesisUtterance(
+      "u1: Donec ac nunc feugiat, posuere");
+    utterance1.lang = 'it-IT-noend';
+    var utterance2 = new win1.SpeechSynthesisUtterance("u2: hello, losers too");
+    utterance2.lang = 'it-IT-noend';
+    var utterance3 = new win1.SpeechSynthesisUtterance("u3: hello, losers three");
+
+    var utterance4 = new win2.SpeechSynthesisUtterance("u4: hello, losers same!");
+    utterance4.lang = 'it-IT-noend';
+    var utterance5 = new win2.SpeechSynthesisUtterance("u5: hello, losers too");
+    utterance5.lang = 'it-IT-noend';
+
+    var eventOrder = ['start1', 'end1', 'start2', 'end2'];
+    utterance1.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start1', 'start1');
+      testSynthState(win1, { speaking: true, pending: true });
+      testSynthState(win2, { speaking: true, pending: true });
+      win2.speechSynthesis.cancel();
+      SpecialPowers.wrap(win1.speechSynthesis).forceEnd();
+
+    });
+    utterance1.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end1', 'end1');
+      testSynthState(win1, { pending: true });
+      testSynthState(win2, { pending: false });
+    });
+    utterance2.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start2', 'start2');
+      testSynthState(win1, { speaking: true, pending: true });
+      testSynthState(win2, { speaking: true, pending: false });
+      win1.speechSynthesis.cancel();
+    });
+    utterance2.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end2', 'end2');
+      testSynthState(win1, { speaking: false, pending: false });
+      testSynthState(win2, { speaking: false, pending: false });
+      SimpleTest.finish();
+    });
+
+    function wrongUtterance(e) {
+      ok(false, 'This shall not be uttered: "' + e.target.text + '"');
+    }
+
+    utterance3.addEventListener('start', wrongUtterance);
+    utterance4.addEventListener('start', wrongUtterance);
+    utterance5.addEventListener('start', wrongUtterance);
+
+    win1.speechSynthesis.speak(utterance1);
+    win1.speechSynthesis.speak(utterance2);
+    win1.speechSynthesis.speak(utterance3);
+    win2.speechSynthesis.speak(utterance4);
+    win2.speechSynthesis.speak(utterance5);
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_global_queue_pause.html b/dom/media/webspeech/synth/test/file_global_queue_pause.html
new file mode 100644
index 0000000000..e345eb4c98
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_global_queue_pause.html
@@ -0,0 +1,130 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1188099
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1188099: Calling pause() should work correctly with global queue</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+    window.todo = parent.todo;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a>
+<iframe id="frame1"></iframe>
+<iframe id="frame2"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+  Promise.all([loadFrame('frame1'), loadFrame('frame2')]).then(function ([frame1, frame2]) {
+    var win1 = frame1.contentWindow;
+    var win2 = frame2.contentWindow;
+
+    var utterance1 = new win1.SpeechSynthesisUtterance("Speak utterance 1.");
+    utterance1.lang = 'it-IT-noend';
+    var utterance2 = new win2.SpeechSynthesisUtterance("Speak utterance 2.");
+    var utterance3 = new win1.SpeechSynthesisUtterance("Speak utterance 3.");
+    var utterance4 = new win2.SpeechSynthesisUtterance("Speak utterance 4.");
+    var eventOrder = ['start1', 'pause1', 'resume1', 'end1', 'start2', 'end2',
+      'start4', 'end4', 'start3', 'end3'];
+
+    utterance1.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start1', 'start1');
+      win1.speechSynthesis.pause();
+    });
+    utterance1.addEventListener('pause', function(e) {
+      var expectedEvent = eventOrder.shift()
+      is(expectedEvent, 'pause1', 'pause1');
+      testSynthState(win1, { speaking: true, pending: false, paused: true});
+      testSynthState(win2, { speaking: true, pending: true, paused: false});
+
+      if (expectedEvent == 'pause1') {
+        win1.speechSynthesis.resume();
+      }
+    });
+    utterance1.addEventListener('resume', function(e) {
+      is(eventOrder.shift(), 'resume1', 'resume1');
+      testSynthState(win1, { speaking: true, pending: false, paused: false});
+      testSynthState(win2, { speaking: true, pending: true, paused: false});
+
+      win2.speechSynthesis.pause();
+
+      testSynthState(win1, { speaking: true, pending: false, paused: false});
+      testSynthState(win2, { speaking: true, pending: true, paused: true });
+
+      // We now make the utterance end.
+      SpecialPowers.wrap(win1.speechSynthesis).forceEnd();
+    });
+    utterance1.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end1', 'end1');
+      testSynthState(win1, { speaking: false, pending: false, paused: false});
+      testSynthState(win2, { speaking: false, pending: true, paused: true});
+
+      win2.speechSynthesis.resume();
+    });
+
+    utterance2.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start2', 'start2');
+      testSynthState(win1, { speaking: true, pending: false, paused: false});
+      testSynthState(win2, { speaking: true, pending: false, paused: false});
+    });
+    utterance2.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end2', 'end2');
+      testSynthState(win1, { speaking: false, pending: false, paused: false});
+      testSynthState(win2, { speaking: false, pending: false, paused: false});
+
+      win1.speechSynthesis.pause();
+
+      testSynthState(win1, { speaking: false, pending: false, paused: true});
+      testSynthState(win2, { speaking: false, pending: false, paused: false});
+
+      win1.speechSynthesis.speak(utterance3);
+      win2.speechSynthesis.speak(utterance4);
+
+      testSynthState(win1, { speaking: false, pending: true, paused: true});
+      testSynthState(win2, { speaking: false, pending: true, paused: false});
+    });
+
+    utterance4.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start4', 'start4');
+      testSynthState(win1, { speaking: true, pending: true, paused: true});
+      testSynthState(win2, { speaking: true, pending: false, paused: false});
+
+      win1.speechSynthesis.resume();
+    });
+    utterance4.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end4', 'end4');
+      testSynthState(win1, { speaking: false, pending: true, paused: false});
+      testSynthState(win2, { speaking: false, pending: false, paused: false});
+    });
+
+    utterance3.addEventListener('start', function(e) {
+      is(eventOrder.shift(), 'start3', 'start3');
+      testSynthState(win1, { speaking: true, pending: false, paused: false});
+      testSynthState(win2, { speaking: true, pending: false, paused: false});
+    });
+
+    utterance3.addEventListener('end', function(e) {
+      is(eventOrder.shift(), 'end3', 'end3');
+      testSynthState(win1, { speaking: false, pending: false, paused: false});
+      testSynthState(win2, { speaking: false, pending: false, paused: false});
+
+      SimpleTest.finish();
+    });
+
+    win1.speechSynthesis.speak(utterance1);
+    win2.speechSynthesis.speak(utterance2);
+  });
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_indirect_service_events.html b/dom/media/webspeech/synth/test/file_indirect_service_events.html
new file mode 100644
index 0000000000..5ed7812757
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_indirect_service_events.html
@@ -0,0 +1,102 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1155034
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1155034: Check that indirect audio services dispatch their own events</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1155034">Mozilla Bug 1155034</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1155034 **/
+
+function testFunc(done_cb) {
+  function test_with_events() {
+    info('test_with_events');
+    var utterance = new SpeechSynthesisUtterance("never end, callback events");
+    utterance.lang = 'it-IT-noend';
+
+    utterance.addEventListener('start', function(e) {
+      info('start test_with_events');
+      speechSynthesis.pause();
+    // Wait to see if we get some bad events we didn't expect.
+    });
+
+    utterance.addEventListener('pause', function(e) {
+      is(e.charIndex, 1, 'pause event charIndex matches service arguments');
+      is(e.elapsedTime, 1.5, 'pause event elapsedTime matches service arguments');
+      speechSynthesis.resume();
+    });
+
+    utterance.addEventListener('resume', function(e) {
+      is(e.charIndex, 1, 'resume event charIndex matches service arguments');
+      is(e.elapsedTime, 1.5, 'resume event elapsedTime matches service arguments');
+      speechSynthesis.cancel();
+    });
+
+    utterance.addEventListener('end', function(e) {
+      is(e.charIndex, 1, 'resume event charIndex matches service arguments');
+      is(e.elapsedTime, 1.5, 'end event elapsedTime matches service arguments');
+      test_no_events();
+    });
+
+    info('start speak');
+    speechSynthesis.speak(utterance);
+  }
+
+  function forbiddenEvent(e) {
+    ok(false, 'no "' + e.type + '" event was explicitly dispatched from the service')
+  }
+
+  function test_no_events() {
+    info('test_no_events');
+    var utterance = new SpeechSynthesisUtterance("never end");
+    utterance.lang = "it-IT-noevents-noend";
+    utterance.addEventListener('start', function(e) {
+      speechSynthesis.pause();
+      // Wait to see if we get some bad events we didn't expect.
+      setTimeout(function() {
+        ok(true, 'didn\'t get any unwanted events');
+        utterance.removeEventListener('end', forbiddenEvent);
+        SpecialPowers.wrap(speechSynthesis).forceEnd();
+        done_cb();
+      }, 1000);
+    });
+
+    utterance.addEventListener('pause', forbiddenEvent);
+    utterance.addEventListener('end', forbiddenEvent);
+
+    speechSynthesis.speak(utterance);
+  }
+
+  test_with_events();
+}
+
+// Run test with no global queue, and then run it with a global queue.
+testFunc(function() {
+  SpecialPowers.pushPrefEnv(
+    { set: [['media.webspeech.synth.force_global_queue', true]] }, function() {
+      testFunc(SimpleTest.finish)
+    });
+});
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_setup.html b/dom/media/webspeech/synth/test/file_setup.html
new file mode 100644
index 0000000000..da8c2c6824
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_setup.html
@@ -0,0 +1,96 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=525444
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 525444: Web Speech API check all classes are present</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+ok(SpeechSynthesis, "SpeechSynthesis exists in global scope");
+ok(SpeechSynthesisVoice, "SpeechSynthesisVoice exists in global scope");
+ok(SpeechSynthesisErrorEvent, "SpeechSynthesisErrorEvent exists in global scope");
+ok(SpeechSynthesisEvent, "SpeechSynthesisEvent exists in global scope");
+
+// SpeechSynthesisUtterance is the only type that has a constructor
+//  and writable properties
+ok(SpeechSynthesisUtterance, "SpeechSynthesisUtterance exists in global scope");
+var ssu = new SpeechSynthesisUtterance("hello world");
+is(typeof ssu, "object", "SpeechSynthesisUtterance instance is an object");
+is(ssu.text, "hello world", "SpeechSynthesisUtterance.text is correct");
+is(ssu.volume, 1, "SpeechSynthesisUtterance.volume default is correct");
+is(ssu.rate, 1, "SpeechSynthesisUtterance.rate default is correct");
+is(ssu.pitch, 1, "SpeechSynthesisUtterance.pitch default is correct");
+ssu.lang = "he-IL";
+ssu.volume = 0.5;
+ssu.rate = 2.0;
+ssu.pitch = 1.5;
+is(ssu.lang, "he-IL", "SpeechSynthesisUtterance.lang is correct");
+is(ssu.volume, 0.5,  "SpeechSynthesisUtterance.volume is correct");
+is(ssu.rate, 2.0,  "SpeechSynthesisUtterance.rate is correct");
+is(ssu.pitch, 1.5,  "SpeechSynthesisUtterance.pitch is correct");
+
+// Assign a rate that is out of bounds
+ssu.rate = 20;
+is(ssu.rate, 10, "SpeechSynthesisUtterance.rate enforces max of 10");
+ssu.rate = 0;
+is(ssu.rate.toPrecision(1), "0.1", "SpeechSynthesisUtterance.rate enforces min of 0.1");
+
+// Assign a volume which is out of bounds
+ssu.volume = 2;
+is(ssu.volume, 1, "SpeechSynthesisUtterance.volume enforces max of 1");
+ssu.volume = -1;
+is(ssu.volume, 0, "SpeechSynthesisUtterance.volume enforces min of 0");
+
+// Assign a pitch which is out of bounds
+ssu.pitch = 2.1;
+is(ssu.pitch, 2, "SpeechSynthesisUtterance.pitch enforces max of 2");
+ssu.pitch = -1;
+is(ssu.pitch, 0, "SpeechSynthesisUtterance.pitch enforces min of 0");
+
+// Test for singleton instance hanging off of window.
+ok(speechSynthesis, "speechSynthesis exists in global scope");
+is(typeof speechSynthesis, "object", "speechSynthesis instance is an object");
+is(typeof speechSynthesis.speak, "function", "speechSynthesis.speak is a function");
+is(typeof speechSynthesis.cancel, "function", "speechSynthesis.cancel is a function");
+is(typeof speechSynthesis.pause, "function", "speechSynthesis.pause is a function");
+is(typeof speechSynthesis.resume, "function", "speechSynthesis.resume is a function");
+is(typeof speechSynthesis.getVoices, "function", "speechSynthesis.getVoices is a function");
+
+is(typeof speechSynthesis.pending, "boolean", "speechSynthesis.pending is a boolean");
+is(typeof speechSynthesis.speaking, "boolean", "speechSynthesis.speaking is a boolean");
+is(typeof speechSynthesis.paused, "boolean", "speechSynthesis.paused is a boolean");
+
+var voices1 = speechSynthesis.getVoices();
+var voices2 = speechSynthesis.getVoices();
+
+ok(!!voices1.length, "More than one voice found");
+ok(voices1.length == voices2.length, "Voice count matches");
+
+for (var i in voices1) {
+  ok(voices1[i] == voices2[i], "Voice instance matches");
+}
+
+SimpleTest.finish();
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_speech_cancel.html b/dom/media/webspeech/synth/test/file_speech_cancel.html
new file mode 100644
index 0000000000..2ab0e1d0a8
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_speech_cancel.html
@@ -0,0 +1,100 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1150315
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1150315: Check that successive cancel/speak calls work</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1150315">Mozilla Bug 1150315</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1150315 **/
+
+function testFunc(done_cb) {
+  var gotEndEvent = false;
+  // A long utterance that we will interrupt.
+  var utterance = new SpeechSynthesisUtterance("Donec ac nunc feugiat, posuere " +
+    "mauris id, pharetra velit. Donec fermentum orci nunc, sit amet maximus" +
+    "dui tincidunt ut. Sed ultricies ac nisi a laoreet. Proin interdum," +
+    "libero maximus hendrerit posuere, lorem risus egestas nisl, a" +
+    "ultricies massa justo eu nisi. Duis mattis nibh a ligula tincidunt" +
+    "tincidunt non eu erat. Sed bibendum varius vulputate. Cras leo magna," +
+    "ornare ac posuere vel, luctus id metus. Mauris nec quam ac augue" +
+    "consectetur bibendum. Integer a commodo tortor. Duis semper dolor eu" +
+    "facilisis facilisis. Etiam venenatis turpis est, quis tincidunt velit" +
+    "suscipit a. Cras semper orci in sapien rhoncus bibendum. Suspendisse" +
+    "eu ex lobortis, finibus enim in, condimentum quam. Maecenas eget dui" +
+    "ipsum. Aliquam tortor leo, interdum eget congue ut, tempor id elit.");
+  utterance.addEventListener('start', function(e) {
+    ok(true, 'start utterance 1');
+    speechSynthesis.cancel();
+    info('cancel!');
+    speechSynthesis.speak(utterance2);
+    info('speak??');
+  });
+
+  var utterance2 = new SpeechSynthesisUtterance("Proin ornare neque vitae " +
+    "risus mattis rutrum. Suspendisse a velit ut est convallis aliquet." +
+    "Nullam ante elit, malesuada vel luctus rutrum, ultricies nec libero." +
+    "Praesent eu iaculis orci. Sed nisl diam, sodales ac purus et," +
+    "volutpat interdum tortor. Nullam aliquam porta elit et maximus. Cras" +
+    "risus lectus, elementum vel sodales vel, ultricies eget lectus." +
+    "Curabitur velit lacus, mollis vel finibus et, molestie sit amet" +
+    "sapien. Proin vitae dolor ac augue posuere efficitur ac scelerisque" +
+    "diam. Nulla sed odio elit.");
+  utterance2.addEventListener('start', function() {
+    info('start');
+    speechSynthesis.cancel();
+    speechSynthesis.speak(utterance3);
+  });
+  utterance2.addEventListener('end', function(e) {
+    gotEndEvent = true;
+  });
+
+  var utterance3 = new SpeechSynthesisUtterance("Hello, world 3!");
+  utterance3.addEventListener('start', function() {
+    ok(gotEndEvent, "didn't get start event for this utterance");
+  });
+  utterance3.addEventListener('end', done_cb);
+
+  // Speak/cancel while paused (Bug 1187105)
+  speechSynthesis.pause();
+  speechSynthesis.speak(new SpeechSynthesisUtterance("hello."));
+  ok(speechSynthesis.pending, "paused speechSynthesis has an utterance queued.");
+  speechSynthesis.cancel();
+  ok(!speechSynthesis.pending, "paused speechSynthesis has no utterance queued.");
+  speechSynthesis.resume();
+
+  speechSynthesis.speak(utterance);
+  ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet.");
+  ok(speechSynthesis.pending, "speechSynthesis has an utterance queued.");
+}
+
+// Run test with no global queue, and then run it with a global queue.
+testFunc(function() {
+  SpecialPowers.pushPrefEnv(
+    { set: [['media.webspeech.synth.force_global_queue', true]] }, function() {
+      testFunc(SimpleTest.finish)
+    });
+});
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_speech_error.html b/dom/media/webspeech/synth/test/file_speech_error.html
new file mode 100644
index 0000000000..b98ec2fac0
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_speech_error.html
@@ -0,0 +1,46 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1226015
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1226015</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1226015">Mozilla Bug 1226015</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1226015 **/
+
+function testFunc(done_cb) {
+  var utterance = new SpeechSynthesisUtterance();
+  utterance.lang = 'it-IT-failatstart';
+
+  speechSynthesis.speak(utterance);
+  speechSynthesis.cancel();
+
+  ok(true, "we didn't crash, that is good.")
+  SimpleTest.finish();
+}
+
+// Run test with no global queue, and then run it with a global queue.
+testFunc();
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_speech_queue.html b/dom/media/webspeech/synth/test/file_speech_queue.html
new file mode 100644
index 0000000000..a471034dcf
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_speech_queue.html
@@ -0,0 +1,86 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=525444
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 525444: Web Speech API, check speech synth queue</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=525444">Mozilla Bug 525444</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+// XXX: Rate and pitch are not tested.
+
+var langUriMap = {};
+
+for (let voice of speechSynthesis.getVoices()) {
+  langUriMap[voice.lang] = voice.voiceURI;
+  ok(true, voice.lang + ' ' + voice.voiceURI + ' ' + voice.default);
+  is(voice.default, voice.lang == 'en-JM', 'Only Jamaican voice should be default');
+}
+
+ok(langUriMap['en-JM'], 'No English-Jamaican voice');
+ok(langUriMap['en-GB'], 'No English-British voice');
+ok(langUriMap['en-CA'], 'No English-Canadian voice');
+ok(langUriMap['fr-CA'], 'No French-Canadian voice');
+ok(langUriMap['es-MX'], 'No Spanish-Mexican voice');
+ok(langUriMap['it-IT-fail'], 'No Failing Italian voice');
+
+function testFunc(done_cb) {
+  synthTestQueue(
+    [[{text: "Hello, world."},
+      { uri: langUriMap['en-JM'] }],
+     [{text: "Bonjour tout le monde .",
+       args: { lang: "fr", rate: 0.5, pitch: 0.75 }},
+      { uri: langUriMap['fr-CA'], rate: 0.5, pitch: 0.75}],
+     [{text: "How are you doing?", args: { lang: "en-GB" } },
+      { rate: 1, pitch: 1, uri: langUriMap['en-GB']}],
+     [{text: "Come stai?", args: { lang: "it-IT-fail" } },
+      { rate: 1, pitch: 1, uri: langUriMap['it-IT-fail'], err: true }],
+     [{text: "¡hasta mañana!", args: { lang: "es-MX" } },
+      { uri: langUriMap['es-MX'] }]],
+    function () {
+      var test_data = [];
+      var voices = speechSynthesis.getVoices();
+      for (let voice of voices) {
+        if (voice.lang.split("-").length > 2) {
+          // Skip voices that don't automatically end with success
+          continue;
+        }
+        test_data.push([{text: "Hello world", args: { voice} },
+                        {uri: voice.voiceURI}]);
+      }
+
+      synthTestQueue(test_data, done_cb);
+    });
+}
+
+// Run test with no global queue, and then run it with a global queue.
+testFunc(function() {
+  SpecialPowers.pushPrefEnv(
+    { set: [['media.webspeech.synth.force_global_queue', true]] }, function() {
+      testFunc(SimpleTest.finish)
+    });
+});
+
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_speech_repeating_utterance.html b/dom/media/webspeech/synth/test/file_speech_repeating_utterance.html
new file mode 100644
index 0000000000..6e37653057
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_speech_repeating_utterance.html
@@ -0,0 +1,26 @@
+<!DOCTYPE HTML>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1305344: Utterance not repeating in Firefox</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.ok = parent.ok;
+  </script>
+  <script src="common.js"></script>
+</head>
+<body>
+  <script>
+    var utterance = new SpeechSynthesisUtterance("repeating?");
+    var counter = 0;
+    utterance.addEventListener('start', function(e) {
+      if (counter++ === 1) {
+        ok(true)
+        SimpleTest.finish();
+      }
+    });
+    speechSynthesis.speak(utterance);
+    speechSynthesis.speak(utterance);
+  </script>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/file_speech_simple.html b/dom/media/webspeech/synth/test/file_speech_simple.html
new file mode 100644
index 0000000000..c3f240ccdc
--- /dev/null
+++ b/dom/media/webspeech/synth/test/file_speech_simple.html
@@ -0,0 +1,53 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295: Web Speech API check all classes are present</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.info = parent.info;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+  <script type="application/javascript" src="common.js"></script>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+var gotStartEvent = false;
+var gotBoundaryEvent = false;
+var utterance = new SpeechSynthesisUtterance("Hello, world!");
+utterance.addEventListener('start', function(e) {
+  ok(speechSynthesis.speaking, "speechSynthesis is speaking.");
+  ok(!speechSynthesis.pending, "speechSynthesis has no other utterances queued.");
+  gotStartEvent = true;
+});
+
+utterance.addEventListener('end', function(e) {
+  ok(!speechSynthesis.speaking, "speechSynthesis is not speaking.");
+  ok(!speechSynthesis.pending, "speechSynthesis has no other utterances queued.");
+  ok(gotStartEvent, "Got 'start' event.");
+  info('end ' + e.elapsedTime);
+  SimpleTest.finish();
+});
+
+speechSynthesis.speak(utterance);
+ok(!speechSynthesis.speaking, "speechSynthesis is not speaking yet.");
+ok(speechSynthesis.pending, "speechSynthesis has an utterance queued.");
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/mochitest.ini b/dom/media/webspeech/synth/test/mochitest.ini
new file mode 100644
index 0000000000..2f188dac67
--- /dev/null
+++ b/dom/media/webspeech/synth/test/mochitest.ini
@@ -0,0 +1,29 @@
+[DEFAULT]
+tags=mtg
+subsuite = media
+support-files =
+  common.js
+  file_bfcache_page1.html
+  file_bfcache_page2.html
+  file_setup.html
+  file_speech_queue.html
+  file_speech_simple.html
+  file_speech_cancel.html
+  file_speech_error.html
+  file_indirect_service_events.html
+  file_global_queue.html
+  file_global_queue_cancel.html
+  file_global_queue_pause.html
+  file_speech_repeating_utterance.html
+
+[test_setup.html]
+[test_speech_queue.html]
+[test_speech_simple.html]
+[test_speech_cancel.html]
+[test_speech_error.html]
+[test_indirect_service_events.html]
+[test_global_queue.html]
+[test_global_queue_cancel.html]
+[test_global_queue_pause.html]
+[test_bfcache.html]
+[test_speech_repeating_utterance.html]
diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
new file mode 100644
index 0000000000..075e8aa878
--- /dev/null
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
@@ -0,0 +1,288 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsFakeSynthServices.h"
+#include "nsPrintfCString.h"
+#include "SharedBuffer.h"
+
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "prenv.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/DebugOnly.h"
+
+#define CHANNELS 1
+#define SAMPLERATE 1600
+
+namespace mozilla::dom {
+
+StaticRefPtr<nsFakeSynthServices> nsFakeSynthServices::sSingleton;
+
+enum VoiceFlags {
+  eSuppressEvents = 1,
+  eSuppressEnd = 2,
+  eFailAtStart = 4,
+  eFail = 8
+};
+
+struct VoiceDetails {
+  const char* uri;
+  const char* name;
+  const char* lang;
+  bool defaultVoice;
+  uint32_t flags;
+};
+
+static const VoiceDetails sVoices[] = {
+    {"urn:moz-tts:fake:bob", "Bob Marley", "en-JM", true, 0},
+    {"urn:moz-tts:fake:amy", "Amy Winehouse", "en-GB", false, 0},
+    {"urn:moz-tts:fake:lenny", "Leonard Cohen", "en-CA", false, 0},
+    {"urn:moz-tts:fake:celine", "Celine Dion", "fr-CA", false, 0},
+    {
+        "urn:moz-tts:fake:julie",
+        "Julieta Venegas",
+        "es-MX",
+        false,
+    },
+    {"urn:moz-tts:fake:zanetta", "Zanetta Farussi", "it-IT", false, 0},
+    {"urn:moz-tts:fake:margherita", "Margherita Durastanti",
+     "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
+    {"urn:moz-tts:fake:teresa", "Teresa Cornelys", "it-IT-noend", false,
+     eSuppressEnd},
+    {"urn:moz-tts:fake:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false,
+     eFailAtStart},
+    {"urn:moz-tts:fake:gottardo", "Gottardo Aldighieri", "it-IT-fail", false,
+     eFail},
+};
+
+// FakeSynthCallback
+class FakeSynthCallback : public nsISpeechTaskCallback {
+ public:
+  explicit FakeSynthCallback(nsISpeechTask* aTask) : mTask(aTask) {}
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(FakeSynthCallback,
+                                           nsISpeechTaskCallback)
+
+  NS_IMETHOD OnPause() override {
+    if (mTask) {
+      mTask->DispatchPause(1.5, 1);
+    }
+
+    return NS_OK;
+  }
+
+  NS_IMETHOD OnResume() override {
+    if (mTask) {
+      mTask->DispatchResume(1.5, 1);
+    }
+
+    return NS_OK;
+  }
+
+  NS_IMETHOD OnCancel() override {
+    if (mTask) {
+      mTask->DispatchEnd(1.5, 1);
+    }
+
+    return NS_OK;
+  }
+
+  NS_IMETHOD OnVolumeChanged(float aVolume) override { return NS_OK; }
+
+ private:
+  virtual ~FakeSynthCallback() = default;
+
+  nsCOMPtr<nsISpeechTask> mTask;
+};
+
+NS_IMPL_CYCLE_COLLECTION(FakeSynthCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FakeSynthCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback)
+
+// FakeSpeechSynth
+
+class FakeSpeechSynth : public nsISpeechService {
+ public:
+  FakeSpeechSynth() = default;
+
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+
+ private:
+  virtual ~FakeSpeechSynth() = default;
+};
+
+NS_IMPL_ISUPPORTS(FakeSpeechSynth, nsISpeechService)
+
+NS_IMETHODIMP
+FakeSpeechSynth::Speak(const nsAString& aText, const nsAString& aUri,
+                       float aVolume, float aRate, float aPitch,
+                       nsISpeechTask* aTask) {
+  class DispatchStart final : public Runnable {
+   public:
+    explicit DispatchStart(nsISpeechTask* aTask)
+        : mozilla::Runnable("DispatchStart"), mTask(aTask) {}
+
+    NS_IMETHOD Run() override {
+      mTask->DispatchStart();
+
+      return NS_OK;
+    }
+
+   private:
+    nsCOMPtr<nsISpeechTask> mTask;
+  };
+
+  class DispatchEnd final : public Runnable {
+   public:
+    DispatchEnd(nsISpeechTask* aTask, const nsAString& aText)
+        : mozilla::Runnable("DispatchEnd"), mTask(aTask), mText(aText) {}
+
+    NS_IMETHOD Run() override {
+      mTask->DispatchEnd(mText.Length() / 2, mText.Length());
+
+      return NS_OK;
+    }
+
+   private:
+    nsCOMPtr<nsISpeechTask> mTask;
+    nsString mText;
+  };
+
+  class DispatchError final : public Runnable {
+   public:
+    DispatchError(nsISpeechTask* aTask, const nsAString& aText)
+        : mozilla::Runnable("DispatchError"), mTask(aTask), mText(aText) {}
+
+    NS_IMETHOD Run() override {
+      mTask->DispatchError(mText.Length() / 2, mText.Length());
+
+      return NS_OK;
+    }
+
+   private:
+    nsCOMPtr<nsISpeechTask> mTask;
+    nsString mText;
+  };
+
+  uint32_t flags = 0;
+  for (VoiceDetails voice : sVoices) {
+    if (aUri.EqualsASCII(voice.uri)) {
+      flags = voice.flags;
+      break;
+    }
+  }
+
+  if (flags & eFailAtStart) {
+    return NS_ERROR_FAILURE;
+  }
+
+  RefPtr<FakeSynthCallback> cb =
+      new FakeSynthCallback((flags & eSuppressEvents) ? nullptr : aTask);
+
+  aTask->Setup(cb);
+
+  nsCOMPtr<nsIRunnable> runnable = new DispatchStart(aTask);
+  NS_DispatchToMainThread(runnable);
+
+  if (flags & eFail) {
+    runnable = new DispatchError(aTask, aText);
+    NS_DispatchToMainThread(runnable);
+  } else if ((flags & eSuppressEnd) == 0) {
+    runnable = new DispatchEnd(aTask, aText);
+    NS_DispatchToMainThread(runnable);
+  }
+
+  return NS_OK;
+}
+
+// nsFakeSynthService
+
+NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(nsFakeSynthServices)
+NS_IMPL_RELEASE(nsFakeSynthServices)
+
+static void AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices,
+                      uint32_t aLength) {
+  RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
+  for (uint32_t i = 0; i < aLength; i++) {
+    NS_ConvertUTF8toUTF16 name(aVoices[i].name);
+    NS_ConvertUTF8toUTF16 uri(aVoices[i].uri);
+    NS_ConvertUTF8toUTF16 lang(aVoices[i].lang);
+    // These services can handle more than one utterance at a time and have
+    // several speaking simultaniously. So, aQueuesUtterances == false
+    registry->AddVoice(aService, uri, name, lang, true, false);
+    if (aVoices[i].defaultVoice) {
+      registry->SetDefaultVoice(uri, true);
+    }
+  }
+
+  registry->NotifyVoicesChanged();
+}
+
+void nsFakeSynthServices::Init() {
+  mSynthService = new FakeSpeechSynth();
+  AddVoices(mSynthService, sVoices, ArrayLength(sVoices));
+}
+
+// nsIObserver
+
+NS_IMETHODIMP
+nsFakeSynthServices::Observe(nsISupports* aSubject, const char* aTopic,
+                             const char16_t* aData) {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (NS_WARN_IF(!(!strcmp(aTopic, "speech-synth-started")))) {
+    return NS_ERROR_UNEXPECTED;
+  }
+
+  if (Preferences::GetBool("media.webspeech.synth.test")) {
+    NS_DispatchToMainThread(NewRunnableMethod(
+        "dom::nsFakeSynthServices::Init", this, &nsFakeSynthServices::Init));
+  }
+
+  return NS_OK;
+}
+
+// static methods
+
+nsFakeSynthServices* nsFakeSynthServices::GetInstance() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!XRE_IsParentProcess()) {
+    MOZ_ASSERT(false,
+               "nsFakeSynthServices can only be started on main gecko process");
+    return nullptr;
+  }
+
+  if (!sSingleton) {
+    sSingleton = new nsFakeSynthServices();
+    ClearOnShutdown(&sSingleton);
+  }
+
+  return sSingleton;
+}
+
+already_AddRefed<nsFakeSynthServices>
+nsFakeSynthServices::GetInstanceForService() {
+  RefPtr<nsFakeSynthServices> picoService = GetInstance();
+  return picoService.forget();
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.h b/dom/media/webspeech/synth/test/nsFakeSynthServices.h
new file mode 100644
index 0000000000..f7e1ca7da6
--- /dev/null
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsFakeSynthServices_h
+#define nsFakeSynthServices_h
+
+#include "nsTArray.h"
+#include "nsIObserver.h"
+#include "nsISpeechService.h"
+#include "nsRefPtrHashtable.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/Monitor.h"
+
+namespace mozilla::dom {
+
+class nsFakeSynthServices : public nsIObserver {
+ public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSIOBSERVER
+
+  nsFakeSynthServices() = default;
+
+  static nsFakeSynthServices* GetInstance();
+
+  static already_AddRefed<nsFakeSynthServices> GetInstanceForService();
+
+ private:
+  virtual ~nsFakeSynthServices() = default;
+
+  void Init();
+
+  nsCOMPtr<nsISpeechService> mSynthService;
+
+  static StaticRefPtr<nsFakeSynthServices> sSingleton;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/test/startup/file_voiceschanged.html b/dom/media/webspeech/synth/test/startup/file_voiceschanged.html
new file mode 100644
index 0000000000..6bb25462e4
--- /dev/null
+++ b/dom/media/webspeech/synth/test/startup/file_voiceschanged.html
@@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1254378
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1254378: Web Speech API check all classes are present</title>
+  <script type="application/javascript">
+    window.SimpleTest = parent.SimpleTest;
+    window.is = parent.is;
+    window.isnot = parent.isnot;
+    window.ok = parent.ok;
+  </script>
+</head>
+<body>
+<script type="application/javascript">
+
+/** Test for Bug 1254378 **/
+
+function onVoicesChanged() {
+  isnot(speechSynthesis.getVoices().length, 0, "Voices added");
+  speechSynthesis.removeEventListener("voiceschanged", onVoicesChanged);
+  SimpleTest.finish();
+}
+
+speechSynthesis.addEventListener("voiceschanged", onVoicesChanged);
+
+is(speechSynthesis.getVoices().length, 0, "No voices added initially");
+</script>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/startup/mochitest.ini b/dom/media/webspeech/synth/test/startup/mochitest.ini
new file mode 100644
index 0000000000..ec4285b772
--- /dev/null
+++ b/dom/media/webspeech/synth/test/startup/mochitest.ini
@@ -0,0 +1,8 @@
+[DEFAULT]
+tags=mtg
+subsuite = media
+support-files =
+  file_voiceschanged.html
+
+[test_voiceschanged.html]
+skip-if = verify
diff --git a/dom/media/webspeech/synth/test/startup/test_voiceschanged.html b/dom/media/webspeech/synth/test/startup/test_voiceschanged.html
new file mode 100644
index 0000000000..a60252ea7e
--- /dev/null
+++ b/dom/media/webspeech/synth/test/startup/test_voiceschanged.html
@@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1254378
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1254378: Emit onvoiceschanged when voices first added</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1254378">Mozilla Bug 1254378</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1254378 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv({ set: [['media.webspeech.synth.enabled', true]] },
+                          function() { document.getElementById("testFrame").src = "file_voiceschanged.html"; });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_bfcache.html b/dom/media/webspeech/synth/test/test_bfcache.html
new file mode 100644
index 0000000000..ba5981a42b
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_bfcache.html
@@ -0,0 +1,46 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1230533
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1230533: Test speech is stopped from a window when unloaded</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1230533">Mozilla Bug 1230533</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+let testWin;
+
+function onDone() {
+  testWin.close();
+  SimpleTest.finish();
+}
+
+SpecialPowers.pushPrefEnv({ set: [
+  ['media.webspeech.synth.enabled', true],
+  ['media.webspeech.synth.force_global_queue', true]] },
+  function() {
+    testWin = window.open("about:blank", "testWin");
+    testWin.onload = function(e) {
+      waitForVoices(testWin)
+        .then(() => testWin.location = "file_bfcache_page1.html")
+    };
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_global_queue.html b/dom/media/webspeech/synth/test/test_global_queue.html
new file mode 100644
index 0000000000..177f79b399
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_global_queue.html
@@ -0,0 +1,35 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1188099
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1188099: Global queue should correctly schedule utterances</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', true]] },
+  function() { loadSpeechTest("file_global_queue.html"); });
+
+</script>
+</pre>
+</body>
+</html>
+\ No newline at end of file
diff --git a/dom/media/webspeech/synth/test/test_global_queue_cancel.html b/dom/media/webspeech/synth/test/test_global_queue_cancel.html
new file mode 100644
index 0000000000..748d1367b5
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_global_queue_cancel.html
@@ -0,0 +1,35 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1188099
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1188099: Calling cancel() should work correctly with global queue</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', true]] },
+  function() { loadSpeechTest("file_global_queue_cancel.html"); });
+
+</script>
+</pre>
+</body>
+</html>
+\ No newline at end of file
diff --git a/dom/media/webspeech/synth/test/test_global_queue_pause.html b/dom/media/webspeech/synth/test/test_global_queue_pause.html
new file mode 100644
index 0000000000..9632d85127
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_global_queue_pause.html
@@ -0,0 +1,35 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1188099
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1188099: Calling pause() should work correctly with global queue</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1188099">Mozilla Bug 1188099</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', true]] },
+  function() { loadSpeechTest("file_global_queue_pause.html"); });
+
+</script>
+</pre>
+</body>
+</html>
+\ No newline at end of file
diff --git a/dom/media/webspeech/synth/test/test_indirect_service_events.html b/dom/media/webspeech/synth/test/test_indirect_service_events.html
new file mode 100644
index 0000000000..e5b32e70f0
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_indirect_service_events.html
@@ -0,0 +1,36 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1155034
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1155034: Check that indirect audio services dispatch their own events</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1155034">Mozilla Bug 1155034</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1155034 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', false]] },
+  function() { loadSpeechTest("file_indirect_service_events.html"); });
+
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_setup.html b/dom/media/webspeech/synth/test/test_setup.html
new file mode 100644
index 0000000000..da07687750
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_setup.html
@@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=525444
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 525444: Web Speech API check all classes are present</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv({ set: [['media.webspeech.synth.enabled', true]] },
+                          function() { document.getElementById("testFrame").src = "file_setup.html"; });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_speech_cancel.html b/dom/media/webspeech/synth/test/test_speech_cancel.html
new file mode 100644
index 0000000000..ced952c736
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_speech_cancel.html
@@ -0,0 +1,35 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1150315
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1150315: Web Speech API check all classes are present</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1150315">Mozilla Bug 1150315</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1150315 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', false]] },
+  function() { loadSpeechTest("file_speech_cancel.html"); });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_speech_error.html b/dom/media/webspeech/synth/test/test_speech_error.html
new file mode 100644
index 0000000000..e2ce156dc6
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_speech_error.html
@@ -0,0 +1,35 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1226015
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1150315: Web Speech API check all classes are present</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1226015">Mozilla Bug 1226015</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 1226015 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', false]] },
+  function() { loadSpeechTest("file_speech_error.html"); });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_speech_queue.html b/dom/media/webspeech/synth/test/test_speech_queue.html
new file mode 100644
index 0000000000..3bca9e0ce2
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_speech_queue.html
@@ -0,0 +1,37 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=525444
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 525444: Web Speech API, check speech synth queue</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=525444">Mozilla Bug 525444</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true],
+          ['media.webspeech.synth.force_global_queue', false]] },
+  function() {
+    loadSpeechTest("file_speech_queue.html");
+  });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_speech_repeating_utterance.html b/dom/media/webspeech/synth/test/test_speech_repeating_utterance.html
new file mode 100644
index 0000000000..6313a275c1
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_speech_repeating_utterance.html
@@ -0,0 +1,18 @@
+<!DOCTYPE HTML>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1305344: Utterance not repeating in Firefox</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script src="common.js"></script>
+  <link rel="stylesheet" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+  <a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1305344">Mozilla Bug 1305344</a>
+  <iframe id="testFrame"></iframe>
+  <script>
+    SimpleTest.waitForExplicitFinish();
+    loadSpeechTest('file_speech_repeating_utterance.html');
+  </script>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/test/test_speech_simple.html b/dom/media/webspeech/synth/test/test_speech_simple.html
new file mode 100644
index 0000000000..c6c0e3a5be
--- /dev/null
+++ b/dom/media/webspeech/synth/test/test_speech_simple.html
@@ -0,0 +1,34 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=650295
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 650295: Web Speech API check all classes are present</title>
+  <script src="/tests/SimpleTest/SimpleTest.js"></script>
+  <script type="application/javascript" src="common.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=650295">Mozilla Bug 650295</a>
+<p id="display"></p>
+<iframe id="testFrame"></iframe>
+<div id="content" style="display: none">
+  
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 525444 **/
+
+SimpleTest.waitForExplicitFinish();
+
+SpecialPowers.pushPrefEnv(
+  { set: [['media.webspeech.synth.enabled', true]] },
+  function() { loadSpeechTest("file_speech_simple.html"); });
+
+</script>
+</pre>
+</body>
+</html>
diff --git a/dom/media/webspeech/synth/windows/SapiService.cpp b/dom/media/webspeech/synth/windows/SapiService.cpp
new file mode 100644
index 0000000000..f1e44213d1
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/SapiService.cpp
@@ -0,0 +1,445 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "SapiService.h"
+#include "nsServiceManagerUtils.h"
+#include "nsEscape.h"
+#include "nsXULAppAPI.h"
+
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/StaticPrefs_media.h"
+
+namespace mozilla::dom {
+
+constexpr static WCHAR kSpCategoryOneCoreVoices[] =
+    L"HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices";
+
+StaticRefPtr<SapiService> SapiService::sSingleton;
+
+class SapiCallback final : public nsISpeechTaskCallback {
+ public:
+  SapiCallback(nsISpeechTask* aTask, ISpVoice* aSapiClient,
+               uint32_t aTextOffset, uint32_t aSpeakTextLen)
+      : mTask(aTask),
+        mSapiClient(aSapiClient),
+        mTextOffset(aTextOffset),
+        mSpeakTextLen(aSpeakTextLen),
+        mCurrentIndex(0),
+        mStreamNum(0) {
+    mStartingTime = TimeStamp::Now();
+  }
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SapiCallback, nsISpeechTaskCallback)
+
+  NS_DECL_NSISPEECHTASKCALLBACK
+
+  ULONG GetStreamNum() const { return mStreamNum; }
+  void SetStreamNum(ULONG aValue) { mStreamNum = aValue; }
+
+  void OnSpeechEvent(const SPEVENT& speechEvent);
+
+ private:
+  ~SapiCallback() {}
+
+  float GetTimeDurationFromStart() const {
+    TimeDuration duration = TimeStamp::Now() - mStartingTime;
+    return duration.ToSeconds();
+  }
+
+  // This pointer is used to dispatch events
+  nsCOMPtr<nsISpeechTask> mTask;
+  RefPtr<ISpVoice> mSapiClient;
+
+  uint32_t mTextOffset;
+  uint32_t mSpeakTextLen;
+
+  // Used for calculating the time taken to speak the utterance
+  TimeStamp mStartingTime;
+  uint32_t mCurrentIndex;
+
+  ULONG mStreamNum;
+};
+
+NS_IMPL_CYCLE_COLLECTION(SapiCallback, mTask);
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SapiCallback)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SapiCallback)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SapiCallback)
+
+NS_IMETHODIMP
+SapiCallback::OnPause() {
+  if (FAILED(mSapiClient->Pause())) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!mTask) {
+    // When calling pause() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiCallback::OnResume() {
+  if (FAILED(mSapiClient->Resume())) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!mTask) {
+    // When calling resume() on child porcess, it may not receive end event
+    // from chrome process yet.
+    return NS_ERROR_FAILURE;
+  }
+  mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiCallback::OnCancel() {
+  // After cancel, mCurrentIndex may be updated.
+  // At cancel case, use mCurrentIndex for DispatchEnd.
+  mSpeakTextLen = 0;
+  // Purge all the previous utterances and speak an empty string
+  if (FAILED(mSapiClient->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr))) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiCallback::OnVolumeChanged(float aVolume) {
+  mSapiClient->SetVolume(static_cast<USHORT>(aVolume * 100));
+  return NS_OK;
+}
+
+void SapiCallback::OnSpeechEvent(const SPEVENT& speechEvent) {
+  switch (speechEvent.eEventId) {
+    case SPEI_START_INPUT_STREAM:
+      mTask->DispatchStart();
+      break;
+    case SPEI_END_INPUT_STREAM:
+      if (mSpeakTextLen) {
+        mCurrentIndex = mSpeakTextLen;
+      }
+      mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
+      mTask = nullptr;
+      break;
+    case SPEI_TTS_BOOKMARK:
+      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
+      mTask->DispatchBoundary(u"mark"_ns, GetTimeDurationFromStart(),
+                              mCurrentIndex, 0, 0);
+      break;
+    case SPEI_WORD_BOUNDARY:
+      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
+      mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(),
+                              mCurrentIndex,
+                              static_cast<ULONG>(speechEvent.wParam), 1);
+      break;
+    case SPEI_SENTENCE_BOUNDARY:
+      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
+      mTask->DispatchBoundary(u"sentence"_ns, GetTimeDurationFromStart(),
+                              mCurrentIndex,
+                              static_cast<ULONG>(speechEvent.wParam), 1);
+      break;
+    default:
+      break;
+  }
+}
+
+// static
+void __stdcall SapiService::SpeechEventCallback(WPARAM aWParam,
+                                                LPARAM aLParam) {
+  RefPtr<ISpVoice> spVoice = (ISpVoice*)aWParam;
+  RefPtr<SapiService> service = (SapiService*)aLParam;
+
+  SPEVENT speechEvent;
+  while (spVoice->GetEvents(1, &speechEvent, nullptr) == S_OK) {
+    for (size_t i = 0; i < service->mCallbacks.Length(); i++) {
+      RefPtr<SapiCallback> callback = service->mCallbacks[i];
+      if (callback->GetStreamNum() == speechEvent.ulStreamNum) {
+        callback->OnSpeechEvent(speechEvent);
+        if (speechEvent.eEventId == SPEI_END_INPUT_STREAM) {
+          service->mCallbacks.RemoveElementAt(i);
+        }
+        break;
+      }
+    }
+  }
+}
+
+NS_INTERFACE_MAP_BEGIN(SapiService)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(SapiService)
+NS_IMPL_RELEASE(SapiService)
+
+SapiService::SapiService() : mInitialized(false) {}
+
+SapiService::~SapiService() {}
+
+bool SapiService::Init() {
+  AUTO_PROFILER_LABEL("SapiService::Init", OTHER);
+
+  MOZ_ASSERT(!mInitialized);
+
+  if (Preferences::GetBool("media.webspeech.synth.test") ||
+      !StaticPrefs::media_webspeech_synth_enabled()) {
+    // When enabled, we shouldn't add OS backend (Bug 1160844)
+    return false;
+  }
+
+  // Get all the voices from sapi and register in the SynthVoiceRegistry
+  if (!RegisterVoices()) {
+    return false;
+  }
+
+  mInitialized = true;
+  return true;
+}
+
+already_AddRefed<ISpVoice> SapiService::InitSapiInstance() {
+  RefPtr<ISpVoice> spVoice;
+  if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice,
+                              getter_AddRefs(spVoice)))) {
+    return nullptr;
+  }
+
+  // Set interest for all the events we are interested in
+  ULONGLONG eventMask = SPFEI(SPEI_START_INPUT_STREAM) |
+                        SPFEI(SPEI_TTS_BOOKMARK) | SPFEI(SPEI_WORD_BOUNDARY) |
+                        SPFEI(SPEI_SENTENCE_BOUNDARY) |
+                        SPFEI(SPEI_END_INPUT_STREAM);
+
+  if (FAILED(spVoice->SetInterest(eventMask, eventMask))) {
+    return nullptr;
+  }
+
+  // Set the callback function for receiving the events
+  spVoice->SetNotifyCallbackFunction(
+      (SPNOTIFYCALLBACK*)SapiService::SpeechEventCallback,
+      (WPARAM)spVoice.get(), (LPARAM)this);
+
+  return spVoice.forget();
+}
+
+bool SapiService::RegisterVoices() {
+  nsCOMPtr<nsISynthVoiceRegistry> registry =
+      do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID);
+  if (!registry) {
+    return false;
+  }
+  bool result = RegisterVoices(registry, kSpCategoryOneCoreVoices);
+  result |= RegisterVoices(registry, SPCAT_VOICES);
+  if (result) {
+    registry->NotifyVoicesChanged();
+  }
+  return result;
+}
+
+bool SapiService::RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry,
+                                 const WCHAR* categoryId) {
+  nsresult rv;
+
+  RefPtr<ISpObjectTokenCategory> category;
+  if (FAILED(CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL,
+                              IID_ISpObjectTokenCategory,
+                              getter_AddRefs(category)))) {
+    return false;
+  }
+  if (FAILED(category->SetId(categoryId, FALSE))) {
+    return false;
+  }
+
+  RefPtr<IEnumSpObjectTokens> voiceTokens;
+  if (FAILED(category->EnumTokens(nullptr, nullptr,
+                                  getter_AddRefs(voiceTokens)))) {
+    return false;
+  }
+
+  WCHAR locale[LOCALE_NAME_MAX_LENGTH];
+  while (true) {
+    RefPtr<ISpObjectToken> voiceToken;
+    if (voiceTokens->Next(1, getter_AddRefs(voiceToken), nullptr) != S_OK) {
+      break;
+    }
+
+    RefPtr<ISpDataKey> attributes;
+    if (FAILED(
+            voiceToken->OpenKey(L"Attributes", getter_AddRefs(attributes)))) {
+      continue;
+    }
+
+    WCHAR* language = nullptr;
+    if (FAILED(attributes->GetStringValue(L"Language", &language))) {
+      continue;
+    }
+
+    // Language attribute is LCID by hex.  So we need convert to locale
+    // name.
+    nsAutoString hexLcid;
+    LCID lcid = wcstol(language, nullptr, 16);
+    CoTaskMemFree(language);
+    if (NS_WARN_IF(
+            !LCIDToLocaleName(lcid, locale, LOCALE_NAME_MAX_LENGTH, 0))) {
+      continue;
+    }
+
+    WCHAR* description = nullptr;
+    if (FAILED(voiceToken->GetStringValue(nullptr, &description))) {
+      continue;
+    }
+
+    nsAutoString uri;
+    uri.AssignLiteral("urn:moz-tts:sapi:");
+    uri.Append(description);
+    uri.AppendLiteral("?");
+    uri.Append(locale);
+
+    // This service can only speak one utterance at a time, se we set
+    // aQueuesUtterances to true in order to track global state and schedule
+    // access to this service.
+    rv = registry->AddVoice(this, uri, nsDependentString(description),
+                            nsDependentString(locale), true, true);
+    CoTaskMemFree(description);
+    if (NS_FAILED(rv)) {
+      continue;
+    }
+
+    mVoices.InsertOrUpdate(uri, std::move(voiceToken));
+  }
+
+  return true;
+}
+
+NS_IMETHODIMP
+SapiService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume,
+                   float aRate, float aPitch, nsISpeechTask* aTask) {
+  NS_ENSURE_TRUE(mInitialized, NS_ERROR_NOT_AVAILABLE);
+
+  RefPtr<ISpObjectToken> voiceToken;
+  if (!mVoices.Get(aUri, getter_AddRefs(voiceToken))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  RefPtr<ISpVoice> spVoice = InitSapiInstance();
+  if (!spVoice) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (FAILED(spVoice->SetVoice(voiceToken))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (FAILED(spVoice->SetVolume(static_cast<USHORT>(aVolume * 100)))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  // The max supported rate in SAPI engines is 3x, and the min is 1/3x. It is
+  // expressed by an integer. 0 being normal rate, -10 is 1/3 and 10 is 3x.
+  // Values below and above that are allowed, but the engine may clip the rate
+  // to its maximum capable value.
+  // "Each increment between -10 and +10 is logarithmically distributed such
+  //  that incrementing or decrementing by 1 is multiplying or dividing the
+  //  rate by the 10th root of 3"
+  // https://msdn.microsoft.com/en-us/library/ee431826(v=vs.85).aspx
+  long rate = aRate != 0 ? static_cast<long>(10 * log10(aRate) / log10(3)) : 0;
+  if (FAILED(spVoice->SetRate(rate))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  // Set the pitch using xml
+  nsAutoString xml;
+  xml.AssignLiteral("<pitch absmiddle=\"");
+  // absmiddle doesn't allow float type
+  xml.AppendInt(static_cast<int32_t>(aPitch * 10.0f - 10.0f));
+  xml.AppendLiteral("\">");
+  uint32_t textOffset = xml.Length();
+
+  for (size_t i = 0; i < aText.Length(); i++) {
+    switch (aText[i]) {
+      case '&':
+        xml.AppendLiteral("&amp;");
+        break;
+      case '<':
+        xml.AppendLiteral("&lt;");
+        break;
+      case '>':
+        xml.AppendLiteral("&gt;");
+        break;
+      default:
+        xml.Append(aText[i]);
+        break;
+    }
+  }
+
+  xml.AppendLiteral("</pitch>");
+
+  RefPtr<SapiCallback> callback =
+      new SapiCallback(aTask, spVoice, textOffset, aText.Length());
+
+  // The last three parameters doesn't matter for an indirect service
+  nsresult rv = aTask->Setup(callback);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  ULONG streamNum;
+  if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) {
+    aTask->Setup(nullptr);
+    return NS_ERROR_FAILURE;
+  }
+
+  callback->SetStreamNum(streamNum);
+  // streamNum reassigns same value when last stream is finished even if
+  // callback for stream end isn't called
+  // So we cannot use data hashtable and has to add it to vector at last.
+  mCallbacks.AppendElement(callback);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SapiService::Observe(nsISupports* aSubject, const char* aTopic,
+                     const char16_t* aData) {
+  return NS_OK;
+}
+
+SapiService* SapiService::GetInstance() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (XRE_GetProcessType() != GeckoProcessType_Default) {
+    MOZ_ASSERT(false, "SapiService can only be started on main gecko process");
+    return nullptr;
+  }
+
+  if (!sSingleton) {
+    RefPtr<SapiService> service = new SapiService();
+    if (service->Init()) {
+      sSingleton = service;
+      ClearOnShutdown(&sSingleton);
+    }
+  }
+  return sSingleton;
+}
+
+already_AddRefed<SapiService> SapiService::GetInstanceForService() {
+  RefPtr<SapiService> sapiService = GetInstance();
+  return sapiService.forget();
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webspeech/synth/windows/SapiService.h b/dom/media/webspeech/synth/windows/SapiService.h
new file mode 100644
index 0000000000..79cc20917b
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/SapiService.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_SapiService_h
+#define mozilla_dom_SapiService_h
+
+#include "nsISpeechService.h"
+#include "nsIObserver.h"
+#include "nsRefPtrHashtable.h"
+#include "nsTArray.h"
+#include "mozilla/StaticPtr.h"
+
+#include <windows.h>
+#include <sapi.h>
+
+class nsISynthVoiceRegistry;
+
+namespace mozilla::dom {
+
+class SapiCallback;
+
+class SapiService final : public nsISpeechService, public nsIObserver {
+ public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+  NS_DECL_NSIOBSERVER
+
+  SapiService();
+  bool Init();
+
+  static SapiService* GetInstance();
+  static already_AddRefed<SapiService> GetInstanceForService();
+
+  static void __stdcall SpeechEventCallback(WPARAM aWParam, LPARAM aLParam);
+
+ private:
+  virtual ~SapiService();
+
+  already_AddRefed<ISpVoice> InitSapiInstance();
+  bool RegisterVoices();
+  bool RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry,
+                      const WCHAR* categoryId);
+
+  nsRefPtrHashtable<nsStringHashKey, ISpObjectToken> mVoices;
+  nsTArray<RefPtr<SapiCallback>> mCallbacks;
+
+  bool mInitialized;
+
+  static StaticRefPtr<SapiService> sSingleton;
+};
+
+}  // namespace mozilla::dom
+
+#endif
diff --git a/dom/media/webspeech/synth/windows/components.conf b/dom/media/webspeech/synth/windows/components.conf
new file mode 100644
index 0000000000..bc9b83a43a
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'cid': '{21b4a45b-9806-4021-a706-d768ab0548f9}',
+        'contract_ids': ['@mozilla.org/synthsapi;1'],
+        'singleton': True,
+        'type': 'mozilla::dom::SapiService',
+        'headers': ['/dom/media/webspeech/synth/windows/SapiService.h'],
+        'constructor': 'mozilla::dom::SapiService::GetInstanceForService',
+        'categories': {"speech-synth-started": 'Sapi Speech Synth'},
+    },
+]
diff --git a/dom/media/webspeech/synth/windows/moz.build b/dom/media/webspeech/synth/windows/moz.build
new file mode 100644
index 0000000000..90bafe9ca7
--- /dev/null
+++ b/dom/media/webspeech/synth/windows/moz.build
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+    "SapiService.cpp",
+]
+
+XPCOM_MANIFESTS += [
+    "components.conf",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"