Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 132 insertions, 0 deletions
diff --git a/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h b/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
new file mode 100644
index 0000000000..c049e5046a
--- /dev/null
+++ b/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
@@ -0,0 +1,132 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_OnlineRecognitionService_h
+#define mozilla_dom_OnlineRecognitionService_h
+
+#include "nsCOMPtr.h"
+#include "nsTArray.h"
+#include "nsISpeechRecognitionService.h"
+#include "speex/speex_resampler.h"
+#include "nsIStreamListener.h"
+#include "OpusTrackEncoder.h"
+#include "ContainerWriter.h"
+
+#define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
+  {0x0ff5ce56,                                   \
+   0x5b09,                                       \
+   0x4db8,                                       \
+   {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};
+
+namespace mozilla {
+
+namespace ipc {
+class PrincipalInfo;
+}  // namespace ipc
+
+/**
+ * Online implementation of the nsISpeechRecognitionService interface
+ */
+class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
+                                       public nsIStreamListener {
+ public:
+  // Add XPCOM glue code
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSISPEECHRECOGNITIONSERVICE
+  NS_DECL_NSIREQUESTOBSERVER
+  NS_DECL_NSISTREAMLISTENER
+
+  /**
+   * Listener responsible for handling the events raised by the TrackEncoder
+   */
+  class SpeechEncoderListener : public TrackEncoderListener {
+   public:
+    explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
+        : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}
+
+    void Started(TrackEncoder* aEncoder) override {}
+
+    void Initialized(TrackEncoder* aEncoder) override {
+      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
+      mService->EncoderInitialized();
+    }
+
+    void Error(TrackEncoder* aEncoder) override {
+      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
+      mService->EncoderError();
+    }
+
+   private:
+    const RefPtr<OnlineSpeechRecognitionService> mService;
+    const RefPtr<AbstractThread> mOwningThread;
+  };
+
+  /**
+   * Default constructs a OnlineSpeechRecognitionService
+   */
+  OnlineSpeechRecognitionService();
+
+  /**
+   * Called by SpeechEncoderListener when the AudioTrackEncoder has been
+   * initialized.
+   */
+  void EncoderInitialized();
+
+  /**
+   * Called after the AudioTrackEncoder has encoded all data for us to wrap in a
+   * container and pass along.
+   */
+  void EncoderFinished();
+
+  /**
+   * Called by SpeechEncoderListener when the AudioTrackEncoder has
+   * encountered an error.
+   */
+  void EncoderError();
+
+ private:
+  /**
+   * Private destructor to prevent bypassing of reference counting
+   */
+  virtual ~OnlineSpeechRecognitionService();
+
+  /** The associated SpeechRecognition */
+  nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;
+
+  /**
+   * Builds a mock SpeechRecognitionResultList
+   */
+  dom::SpeechRecognitionResultList* BuildMockResultList();
+
+  /**
+   * Method responsible for uploading the audio to the remote endpoint
+   */
+  void DoSTT();
+
+  // Encoded and packaged ogg audio data
+  nsTArray<nsTArray<uint8_t>> mEncodedData;
+  // Member responsible for holding a reference to the TrackEncoderListener
+  RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
+  // MediaQueue fed encoded data by mAudioEncoder
+  MediaQueue<EncodedFrame> mEncodedAudioQueue;
+  // Encoder responsible for encoding the frames from pcm to opus which is the
+  // format supported by our backend
+  UniquePtr<AudioTrackEncoder> mAudioEncoder;
+  // Object responsible for wrapping the opus frames into an ogg container
+  UniquePtr<ContainerWriter> mWriter;
+  // Member responsible for storing the json string returned by the endpoint
+  nsCString mBuf;
+  // Used to calculate a ceiling on the time spent listening.
+  TimeStamp mFirstIteration;
+  // flag responsible to control if the user choose to abort
+  bool mAborted = false;
+  //  reference to the audio encoder queue
+  RefPtr<TaskQueue> mEncodeTaskQueue;
+};
+
+}  // namespace mozilla
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip