summaryrefslogtreecommitdiffstats
path: root/widget/TextRecognition.cpp
blob: 3d4a053bec8aac61d12bfbc58cab94f2090e4f28 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "TextRecognition.h"
#include "mozilla/dom/Promise.h"
#include "mozilla/dom/ShadowRoot.h"
#include "mozilla/dom/Document.h"
#include "mozilla/dom/ContentChild.h"
#include "nsTextNode.h"
#include "imgIContainer.h"

#ifdef XP_MACOSX
#  include "nsCocoaFeatures.h"
#endif

using namespace mozilla::dom;

namespace mozilla::widget {

auto TextRecognition::FindText(imgIContainer& aImage,
                               const nsTArray<nsCString>& aLanguages)
    -> RefPtr<NativePromise> {
  // TODO: Maybe decode async.
  RefPtr<gfx::SourceSurface> surface = aImage.GetFrame(
      imgIContainer::FRAME_CURRENT,
      imgIContainer::FLAG_SYNC_DECODE | imgIContainer::FLAG_ASYNC_NOTIFY);
  if (NS_WARN_IF(!surface)) {
    return NativePromise::CreateAndReject("Failed to get surface"_ns, __func__);
  }
  RefPtr<gfx::DataSourceSurface> dataSurface = surface->GetDataSurface();
  if (NS_WARN_IF(!dataSurface)) {
    return NativePromise::CreateAndReject("Failed to get data surface"_ns,
                                          __func__);
  }
  return FindText(*dataSurface, aLanguages);
}

auto TextRecognition::FindText(gfx::DataSourceSurface& aSurface,
                               const nsTArray<nsCString>& aLanguages)
    -> RefPtr<NativePromise> {
  if (XRE_IsContentProcess()) {
    auto* contentChild = ContentChild::GetSingleton();
    auto image = nsContentUtils::SurfaceToIPCImage(aSurface);
    if (!image) {
      return NativePromise::CreateAndReject("Failed to share data surface"_ns,
                                            __func__);
    }
    auto promise = MakeRefPtr<NativePromise::Private>(__func__);
    contentChild->SendFindImageText(std::move(*image), aLanguages)
        ->Then(
            GetCurrentSerialEventTarget(), __func__,
            [promise](TextRecognitionResultOrError&& aResultOrError) {
              switch (aResultOrError.type()) {
                case TextRecognitionResultOrError::Type::TTextRecognitionResult:
                  promise->Resolve(
                      std::move(aResultOrError.get_TextRecognitionResult()),
                      __func__);
                  break;
                case TextRecognitionResultOrError::Type::TnsCString:
                  promise->Reject(std::move(aResultOrError.get_nsCString()),
                                  __func__);
                  break;
                default:
                  MOZ_ASSERT_UNREACHABLE("Unknown result?");
                  promise->Reject("Unknown error"_ns, __func__);
                  break;
              }
            },
            [promise](mozilla::ipc::ResponseRejectReason) {
              promise->Reject("IPC rejection"_ns, __func__);
            });
    return promise;
  }
  return DoFindText(aSurface, aLanguages);
}

void TextRecognition::FillShadow(ShadowRoot& aShadow,
                                 const TextRecognitionResult& aResult) {
  auto& doc = *aShadow.OwnerDoc();
  RefPtr<Element> div = doc.CreateHTMLElement(nsGkAtoms::div);
  for (const auto& quad : aResult.quads()) {
    RefPtr<Element> span = doc.CreateHTMLElement(nsGkAtoms::span);
    // TODO: We probably want to position them here and so on. For now, expose
    // the data as attributes so that it's easy to play with the returned values
    // in JS.
    {
      nsAutoString points;
      for (const auto& point : quad.points()) {
        points.AppendFloat(point.x);
        points.Append(u',');
        points.AppendFloat(point.y);
        points.Append(u',');
      }
      points.Trim(",");
      span->SetAttribute(u"data-points"_ns, points, IgnoreErrors());
      nsAutoString confidence;
      confidence.AppendFloat(quad.confidence());
      span->SetAttribute(u"data-confidence"_ns, confidence, IgnoreErrors());
    }

    {
      RefPtr<nsTextNode> text = doc.CreateTextNode(quad.string());
      span->AppendChildTo(text, true, IgnoreErrors());
    }
    div->AppendChildTo(span, true, IgnoreErrors());
  }
  aShadow.AppendChildTo(div, true, IgnoreErrors());
}

#ifndef XP_MACOSX
auto TextRecognition::DoFindText(gfx::DataSourceSurface&,
                                 const nsTArray<nsCString>&)
    -> RefPtr<NativePromise> {
  MOZ_RELEASE_ASSERT(XRE_IsParentProcess(),
                     "This should only run in the parent process");
  return NativePromise::CreateAndReject("Text recognition not available"_ns,
                                        __func__);
}
#endif

bool TextRecognition::IsSupported() {
#ifdef XP_MACOSX
  // Catalina (10.15) or higher is required because of the following API:
  // VNRecognizeTextRequest - macOS 10.15+
  // https://developer.apple.com/documentation/vision/vnrecognizetextrequest?language=objc
  return nsCocoaFeatures::OnCatalinaOrLater();
#else
  return false;
#endif
}

}  // namespace mozilla::widget