summaryrefslogtreecommitdiffstats
path: root/toolkit/components/url-classifier/Classifier.h
blob: a9af2736d2fbc28a67d56aa17ae04b270fe8a070 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef Classifier_h__
#define Classifier_h__

#include "Entries.h"
#include "HashStore.h"
#include "ProtocolParser.h"
#include "LookupCache.h"
#include "mozilla/Atomics.h"
#include "nsCOMPtr.h"
#include "nsString.h"
#include "nsIFile.h"

namespace mozilla {

class LazyIdleThread;

namespace safebrowsing {

/**
 * Maintains the stores and LookupCaches for the url classifier.
 */
class Classifier {
 public:
  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(Classifier);

  Classifier();

  nsresult Open(nsIFile& aCacheDirectory);
  void Close();
  void Reset();  // Not including any intermediary for update.

  /**
   * Clear data for specific tables.
   * If ClearType is Clear_Cache, this function will only clear cache in lookup
   * cache, otherwise, it will clear data in lookup cache and data stored on
   * disk.
   */
  enum ClearType {
    Clear_Cache,
    Clear_All,
  };
  void ResetTables(ClearType aType, const nsTArray<nsCString>& aTables);

  /**
   * Get the list of active tables and their chunks in a format
   * suitable for an update request.
   */
  void TableRequest(nsACString& aResult);

  /*
   * Get all tables that we know about.
   */
  nsresult ActiveTables(nsTArray<nsCString>& aTables) const;

  /**
   * Check URL fragments against a specified table.
   * The fragments should be generated by |LookupCache::GetLookupFragments|
   */
  nsresult CheckURIFragments(const nsTArray<nsCString>& aSpecFragments,
                             const nsACString& table,
                             LookupResultArray& aResults);

  /**
   * Asynchronously apply updates to the in-use databases. When the
   * update is complete, the caller can be notified by |aCallback|, which
   * will occur on the caller thread.
   */
  using AsyncUpdateCallback = std::function<void(nsresult)>;
  nsresult AsyncApplyUpdates(const TableUpdateArray& aUpdates,
                             const AsyncUpdateCallback& aCallback);

  /**
   * Wait until the ongoing async update is finished and callback
   * is fired. Once this function returns, AsyncApplyUpdates is
   * no longer available.
   */
  void FlushAndDisableAsyncUpdate();

  /**
   * Apply full hashes retrived from gethash to cache.
   */
  nsresult ApplyFullHashes(ConstTableUpdateArray& aUpdates);

  /*
   * Get a bunch of extra prefixes to query for completion
   * and mask the real entry being requested
   */
  nsresult ReadNoiseEntries(const Prefix& aPrefix, const nsACString& aTableName,
                            uint32_t aCount, PrefixArray& aNoiseEntries);

#ifdef MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES
  nsresult DumpRawTableUpdates(const nsACString& aRawUpdates);
#endif

  static void SplitTables(const nsACString& str, nsTArray<nsCString>& tables);

  // Given a root store directory, return a private store directory
  // based on the table name. To avoid migration issue, the private
  // store directory is only different from root directory for V4 tables.
  //
  // For V4 tables (suffixed by '-proto'), the private directory would
  // be [root directory path]/[provider]. The provider of V4 tables is
  // 'google4'.
  //
  // Note that if the table name is not owned by any provider, just use
  // the root directory.
  static nsresult GetPrivateStoreDirectory(nsIFile* aRootStoreDirectory,
                                           const nsACString& aTableName,
                                           const nsACString& aProvider,
                                           nsIFile** aPrivateStoreDirectory);

  // Swap in in-memory and on-disk database and remove all
  // update intermediaries.
  nsresult SwapInNewTablesAndCleanup();

  RefPtr<LookupCache> GetLookupCache(const nsACString& aTable,
                                     bool aForUpdate = false);

  void GetCacheInfo(const nsACString& aTable,
                    nsIUrlClassifierCacheInfo** aCache);

  bool OnUpdateThread() const;

 private:
  ~Classifier();

  void DropStores();
  void DeleteTables(nsIFile* aDirectory, const nsTArray<nsCString>& aTables);

  nsresult CreateStoreDirectory();
  nsresult SetupPathNames();
  nsresult RecoverBackups();
  nsresult CleanToDelete();
  nsresult CopyInUseDirForUpdate();
  nsresult CopyDirectoryInterruptible(nsCOMPtr<nsIFile>& aDestDir,
                                      nsCOMPtr<nsIFile>& aSourceDir);
  nsresult RegenActiveTables();

  void MergeNewLookupCaches();  // Merge mNewLookupCaches into mLookupCaches.

  void CopyAndInvalidateFullHashCache();

  // Remove any intermediary for update, including in-memory
  // and on-disk data.
  void RemoveUpdateIntermediaries();

#ifdef MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES
  already_AddRefed<nsIFile> GetFailedUpdateDirectroy();
  nsresult DumpFailedUpdate();
#endif

  nsresult ScanStoreDir(nsIFile* aDirectory,
                        const nsTArray<nsCString>& aExtensions,
                        nsTArray<nsCString>& aTables);

  nsresult UpdateHashStore(TableUpdateArray& aUpdates,
                           const nsACString& aTable);

  nsresult UpdateTableV4(TableUpdateArray& aUpdates, const nsACString& aTable);

  nsresult UpdateCache(RefPtr<const TableUpdate> aUpdates);

  RefPtr<LookupCache> GetLookupCacheForUpdate(const nsACString& aTable) {
    return GetLookupCache(aTable, true);
  }

  RefPtr<LookupCache> GetLookupCacheFrom(const nsACString& aTable,
                                         LookupCacheArray& aLookupCaches,
                                         nsIFile* aRootStoreDirectory);

  bool CheckValidUpdate(TableUpdateArray& aUpdates, const nsACString& aTable);

  nsresult LoadHashStore(nsIFile* aDirectory, nsACString& aResult,
                         nsTArray<nsCString>& aFailedTableNames);

  nsresult LoadMetadata(nsIFile* aDirectory, nsACString& aResult,
                        nsTArray<nsCString>& aFailedTableNames);

  static nsCString GetProvider(const nsACString& aTableName);

  /**
   * The "background" part of ApplyUpdates. Once the background update
   * is called, the foreground update has to be called along with the
   * background result no matter whether the background update is
   * successful or not.
   */
  nsresult ApplyUpdatesBackground(TableUpdateArray& aUpdates,
                                  nsTArray<nsCString>& aFailedTableNames);

  /**
   * The "foreground" part of ApplyUpdates. The in-use data (in-memory and
   * on-disk) will be touched so this MUST be mutually exclusive to other
   * member functions.
   *
   * If |aBackgroundRv| is successful, the return value is the result of
   * bringing stuff to the foreground. Otherwise, the foreground table may
   * be reset according to the background update failed reason and
   * |aBackgroundRv| will be returned to forward the background update result.
   */
  nsresult ApplyUpdatesForeground(nsresult aBackgroundRv,
                                  const nsTArray<nsCString>& aFailedTableNames);

  // Used by worker thread and update thread to abort current operation.
  bool ShouldAbort() const;

  // Add built-in entries for testing.
  nsresult AddMozEntries(nsTArray<nsCString>& aTables);

  // Remove test files if exist
  nsresult ClearLegacyFiles();

  // Root dir of the Local profile.
  nsCOMPtr<nsIFile> mCacheDirectory;
  // Main directory where to store the databases.
  nsCOMPtr<nsIFile> mRootStoreDirectory;
  // Used for atomically updating the other dirs.
  nsCOMPtr<nsIFile> mBackupDirectory;
  nsCOMPtr<nsIFile> mUpdatingDirectory;  // For update only.
  nsCOMPtr<nsIFile> mToDeleteDirectory;
  LookupCacheArray mLookupCaches;  // For query only.
  nsTArray<nsCString> mActiveTablesCache;
  uint32_t mHashKey;

  // In-memory cache for the result of TableRequest. See
  // nsIUrlClassifierDBService.getTables for the format.
  nsCString mTableRequestResult;

  // Whether mTableRequestResult is outdated and needs to
  // be reloaded from disk.
  bool mIsTableRequestResultOutdated;

  // The copy of mLookupCaches for update only.
  LookupCacheArray mNewLookupCaches;

  // True when Reset() is called.
  bool mUpdateInterrupted;

  // True once CLose() has been called
  Atomic<bool> mIsClosed;

  RefPtr<LazyIdleThread> mUpdateThread;  // For async update.

  // Identical to mRootStoreDirectory but for update only because
  // nsIFile is not thread safe and mRootStoreDirectory needs to
  // be accessed in CopyInUseDirForUpdate().
  // It will be initialized right before update on the worker thread.
  nsCOMPtr<nsIFile> mRootStoreDirectoryForUpdate;
};

}  // namespace safebrowsing
}  // namespace mozilla

#endif