summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/recognition/OnlineSpeechRecognitionService.h
blob: c049e5046a432de9c114e0ed72a540e90262f762 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef mozilla_dom_OnlineRecognitionService_h
#define mozilla_dom_OnlineRecognitionService_h

#include "nsCOMPtr.h"
#include "nsTArray.h"
#include "nsISpeechRecognitionService.h"
#include "speex/speex_resampler.h"
#include "nsIStreamListener.h"
#include "OpusTrackEncoder.h"
#include "ContainerWriter.h"

#define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
  {0x0ff5ce56,                                   \
   0x5b09,                                       \
   0x4db8,                                       \
   {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};

namespace mozilla {

namespace ipc {
class PrincipalInfo;
}  // namespace ipc

/**
 * Online implementation of the nsISpeechRecognitionService interface
 */
class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
                                       public nsIStreamListener {
 public:
  // Add XPCOM glue code
  NS_DECL_THREADSAFE_ISUPPORTS
  NS_DECL_NSISPEECHRECOGNITIONSERVICE
  NS_DECL_NSIREQUESTOBSERVER
  NS_DECL_NSISTREAMLISTENER

  /**
   * Listener responsible for handling the events raised by the TrackEncoder
   */
  class SpeechEncoderListener : public TrackEncoderListener {
   public:
    explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
        : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}

    void Started(TrackEncoder* aEncoder) override {}

    void Initialized(TrackEncoder* aEncoder) override {
      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
      mService->EncoderInitialized();
    }

    void Error(TrackEncoder* aEncoder) override {
      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
      mService->EncoderError();
    }

   private:
    const RefPtr<OnlineSpeechRecognitionService> mService;
    const RefPtr<AbstractThread> mOwningThread;
  };

  /**
   * Default constructs a OnlineSpeechRecognitionService
   */
  OnlineSpeechRecognitionService();

  /**
   * Called by SpeechEncoderListener when the AudioTrackEncoder has been
   * initialized.
   */
  void EncoderInitialized();

  /**
   * Called after the AudioTrackEncoder has encoded all data for us to wrap in a
   * container and pass along.
   */
  void EncoderFinished();

  /**
   * Called by SpeechEncoderListener when the AudioTrackEncoder has
   * encountered an error.
   */
  void EncoderError();

 private:
  /**
   * Private destructor to prevent bypassing of reference counting
   */
  virtual ~OnlineSpeechRecognitionService();

  /** The associated SpeechRecognition */
  nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;

  /**
   * Builds a mock SpeechRecognitionResultList
   */
  dom::SpeechRecognitionResultList* BuildMockResultList();

  /**
   * Method responsible for uploading the audio to the remote endpoint
   */
  void DoSTT();

  // Encoded and packaged ogg audio data
  nsTArray<nsTArray<uint8_t>> mEncodedData;
  // Member responsible for holding a reference to the TrackEncoderListener
  RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
  // MediaQueue fed encoded data by mAudioEncoder
  MediaQueue<EncodedFrame> mEncodedAudioQueue;
  // Encoder responsible for encoding the frames from pcm to opus which is the
  // format supported by our backend
  UniquePtr<AudioTrackEncoder> mAudioEncoder;
  // Object responsible for wrapping the opus frames into an ogg container
  UniquePtr<ContainerWriter> mWriter;
  // Member responsible for storing the json string returned by the endpoint
  nsCString mBuf;
  // Used to calculate a ceiling on the time spent listening.
  TimeStamp mFirstIteration;
  // flag responsible to control if the user choose to abort
  bool mAborted = false;
  //  reference to the audio encoder queue
  RefPtr<TaskQueue> mEncodeTaskQueue;
};

}  // namespace mozilla

#endif