1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#import <Vision/Vision.h>
#include "mozilla/dom/Promise.h"
#include "mozilla/gfx/2D.h"
#include "mozilla/ErrorResult.h"
#include "ErrorList.h"
#include "nsClipboard.h"
#include "nsCocoaUtils.h"
#include "mozilla/MacStringHelpers.h"
#include "mozilla/ScopeExit.h"
#include "mozilla/widget/TextRecognition.h"
#include "mozilla/dom/PContent.h"
namespace mozilla::widget {
auto TextRecognition::DoFindText(gfx::DataSourceSurface& aSurface,
const nsTArray<nsCString>& aLanguages)
-> RefPtr<NativePromise> {
NS_OBJC_BEGIN_TRY_IGNORE_BLOCK
// TODO - Is this the most efficient path? Maybe we can write a new
// CreateCGImageFromXXX that enables more efficient marshalling of the data.
CGImageRef imageRef = NULL;
nsresult rv = nsCocoaUtils::CreateCGImageFromSurface(&aSurface, &imageRef);
if (NS_FAILED(rv) || !imageRef) {
return NativePromise::CreateAndReject("Failed to create CGImage"_ns,
__func__);
}
auto promise = MakeRefPtr<NativePromise::Private>(__func__);
NSMutableArray* recognitionLanguages = [[NSMutableArray alloc] init];
for (const auto& locale : aLanguages) {
[recognitionLanguages addObject:nsCocoaUtils::ToNSString(locale)];
}
NS_DispatchBackgroundTask(
NS_NewRunnableFunction(
__func__,
[promise, imageRef, recognitionLanguages] {
auto unrefImage = MakeScopeExit([&] {
::CGImageRelease(imageRef);
[recognitionLanguages release];
});
dom::TextRecognitionResult result;
dom::TextRecognitionResult* pResult = &result;
// Define the request to use, which also handles the result. It will
// be run below directly in this thread. After creating this
// request.
VNRecognizeTextRequest* textRecognitionRequest =
[[VNRecognizeTextRequest alloc] initWithCompletionHandler:^(
VNRequest* _Nonnull request,
NSError* _Nullable error) {
NSArray<VNRecognizedTextObservation*>* observations =
request.results;
[observations enumerateObjectsUsingBlock:^(
VNRecognizedTextObservation* _Nonnull obj,
NSUInteger idx, BOOL* _Nonnull stop) {
// Requests the n top candidates for a recognized text
// string.
VNRecognizedText* recognizedText =
[obj topCandidates:1].firstObject;
// https://developer.apple.com/documentation/vision/vnrecognizedtext?language=objc
auto& quad = *pResult->quads().AppendElement();
CopyNSStringToXPCOMString(recognizedText.string,
quad.string());
quad.confidence() = recognizedText.confidence;
auto ToImagePoint = [](CGPoint aPoint) -> ImagePoint {
return {static_cast<float>(aPoint.x),
static_cast<float>(aPoint.y)};
};
*quad.points().AppendElement() =
ToImagePoint(obj.bottomLeft);
*quad.points().AppendElement() = ToImagePoint(obj.topLeft);
*quad.points().AppendElement() = ToImagePoint(obj.topRight);
*quad.points().AppendElement() =
ToImagePoint(obj.bottomRight);
}];
}];
textRecognitionRequest.recognitionLevel =
VNRequestTextRecognitionLevelAccurate;
textRecognitionRequest.recognitionLanguages = recognitionLanguages;
textRecognitionRequest.usesLanguageCorrection = true;
// Send out the request. This blocks execution of this thread with
// an expensive CPU call.
NSError* error = nil;
VNImageRequestHandler* requestHandler =
[[[VNImageRequestHandler alloc] initWithCGImage:imageRef
options:@{}]
autorelease];
[requestHandler performRequests:@[ textRecognitionRequest ]
error:&error];
if (error != nil) {
promise->Reject(
nsPrintfCString(
"Failed to perform text recognition request (%ld)\n",
error.code),
__func__);
} else {
promise->Resolve(std::move(result), __func__);
}
}),
NS_DISPATCH_EVENT_MAY_BLOCK);
return promise;
NS_OBJC_END_TRY_IGNORE_BLOCK
}
} // namespace mozilla::widget
|