summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/uspoof_build.cpp
blob: 098d272e1962324bcfdf261cf06b1155686a4336 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 ***************************************************************************
 * Copyright (C) 2008-2015, International Business Machines Corporation
 * and others. All Rights Reserved.
 ***************************************************************************
 *   file name:  uspoof_build.cpp
 *   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created on: 2008 Dec 8
 *   created by: Andy Heninger
 *
 *   Unicode Spoof Detection Data Builder
 *   Builder-related functions are kept in separate files so that applications not needing
 *   the builder can more easily exclude them, typically by means of static linking.
 *
 *   There are three relatively independent sets of Spoof data,
 *      Confusables,
 *      Whole Script Confusables
 *      ID character extensions.
 *
 *   The data tables for each are built separately, each from its own definitions
 */

#include "unicode/utypes.h"
#include "unicode/uspoof.h"
#include "unicode/unorm.h"
#include "unicode/uregex.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "uspoof_impl.h"
#include "uhash.h"
#include "uvector.h"
#include "uassert.h"
#include "uarrsort.h"
#include "uspoof_conf.h"

#if !UCONFIG_NO_NORMALIZATION

U_NAMESPACE_USE

// Defined in uspoof.cpp, initializes file-static variables.
U_CFUNC void uspoof_internalInitStatics(UErrorCode *status);

// The main data building function

U_CAPI USpoofChecker * U_EXPORT2
uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
                      const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/,
                      int32_t *errorType, UParseError *pe, UErrorCode *status) {
    uspoof_internalInitStatics(status);
    if (U_FAILURE(*status)) {
        return nullptr;
    }
#if UCONFIG_NO_REGULAR_EXPRESSIONS 
    *status = U_UNSUPPORTED_ERROR;      
    return nullptr;
#else
    if (errorType!=nullptr) {
        *errorType = 0;
    }
    if (pe != nullptr) {
        pe->line = 0;
        pe->offset = 0;
        pe->preContext[0] = 0;
        pe->postContext[0] = 0;
    }

    // Set up a shell of a spoof detector, with empty data.
    SpoofData *newSpoofData = new SpoofData(*status);

    if (newSpoofData == nullptr) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }

    if (U_FAILURE(*status)) {
        delete newSpoofData;
        return nullptr;
    }
    SpoofImpl *This = new SpoofImpl(newSpoofData, *status);

    if (This == nullptr) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called.
        return nullptr;
    }

    if (U_FAILURE(*status)) {
        delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it.
        return nullptr;
    }

    // Compile the binary data from the source (text) format.
    ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
    
    if (U_FAILURE(*status)) {
        delete This;
        This = nullptr;
    }
    return (USpoofChecker *)This;
#endif // UCONFIG_NO_REGULAR_EXPRESSIONS 
}

#endif