diff options
Diffstat (limited to 'third_party/content_analysis_sdk')
12 files changed, 755 insertions, 365 deletions
diff --git a/third_party/content_analysis_sdk/.gitignore b/third_party/content_analysis_sdk/.gitignore deleted file mode 100644 index 0ab461830e..0000000000 --- a/third_party/content_analysis_sdk/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -.vscode/ -.ccls-cache/ -.cache/ -build/ -*.bak -*.swp diff --git a/third_party/content_analysis_sdk/agent/src/event_win.cc b/third_party/content_analysis_sdk/agent/src/event_win.cc index 907bdfb858..99b4233237 100644 --- a/third_party/content_analysis_sdk/agent/src/event_win.cc +++ b/third_party/content_analysis_sdk/agent/src/event_win.cc @@ -3,7 +3,6 @@ // found in the LICENSE file. #include <ios> -#include <iostream> #include <sstream> #include <utility> diff --git a/third_party/content_analysis_sdk/agent_improvements.patch b/third_party/content_analysis_sdk/agent_improvements.patch new file mode 100644 index 0000000000..c1475caded --- /dev/null +++ b/third_party/content_analysis_sdk/agent_improvements.patch @@ -0,0 +1,480 @@ +commit 4ad63eb3aa65ce7baa08190aac2770540dc25f43 +Author: Greg Stoll <gstoll@mozilla.com> +Date: Wed, 27 Mar 2024 12:13:56 -0500 + + Mozilla improvements to content_analysis_sdk + + - add ability for demo agent to block/warn/report specific regexes + - add ability for demo agent to chose a sequence of delays to apply + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 39477223f031c..5dacc81031117 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -203,6 +203,7 @@ add_executable(agent + ./demo/agent.cc + ./demo/handler.h + ) ++target_compile_features(agent PRIVATE cxx_std_17) + target_include_directories(agent PRIVATE ${AGENT_INCLUDES}) + target_link_libraries(agent PRIVATE cac_agent) + +diff --git a/agent/src/event_win.h b/agent/src/event_win.h +index 9f8b6903566f2..f631f693dcd9c 100644 +--- a/agent/src/event_win.h ++++ b/agent/src/event_win.h +@@ -28,6 +28,12 @@ class ContentAnalysisEventWin : public ContentAnalysisEventBase { + ResultCode Close() override; + ResultCode Send() override; + std::string DebugString() const override; ++ std::string SerializeStringToSendToBrowser() { ++ return agent_to_chrome()->SerializeAsString(); ++ } ++ void SetResponseSent() { response_sent_ = true; } ++ ++ HANDLE Pipe() const { return hPipe_; } + + private: + void Shutdown(); +diff --git a/browser/src/client_win.cc b/browser/src/client_win.cc +index 9d3d7e8c52662..039946d131398 100644 +--- a/browser/src/client_win.cc ++++ b/browser/src/client_win.cc +@@ -418,7 +418,11 @@ DWORD ClientWin::ConnectToPipe(const std::string& pipename, HANDLE* handle) { + + void ClientWin::Shutdown() { + if (hPipe_ != INVALID_HANDLE_VALUE) { +- FlushFileBuffers(hPipe_); ++ // TODO: This trips the LateWriteObserver. We could move this earlier ++ // (before the LateWriteObserver is created) or just remove it, although ++ // the later could mean an ACK message is not processed by the agent ++ // in time. ++ // FlushFileBuffers(hPipe_); + CloseHandle(hPipe_); + hPipe_ = INVALID_HANDLE_VALUE; + } +diff --git a/demo/agent.cc b/demo/agent.cc +index ff8b93f647ebd..3e168b0915a0c 100644 +--- a/demo/agent.cc ++++ b/demo/agent.cc +@@ -2,12 +2,18 @@ + // Use of this source code is governed by a BSD-style license that can be + // found in the LICENSE file. + ++#include <algorithm> + #include <fstream> + #include <iostream> + #include <string> ++#include <regex> ++#include <vector> + + #include "content_analysis/sdk/analysis_agent.h" + #include "demo/handler.h" ++#include "demo/handler_misbehaving.h" ++ ++using namespace content_analysis::sdk; + + // Different paths are used depending on whether this agent should run as a + // use specific agent or not. These values are chosen to match the test +@@ -19,19 +25,50 @@ constexpr char kPathSystem[] = "brcm_chrm_cas"; + std::string path = kPathSystem; + bool use_queue = false; + bool user_specific = false; +-unsigned long delay = 0; // In seconds. ++std::vector<unsigned long> delays = {0}; // In seconds. + unsigned long num_threads = 8u; + std::string save_print_data_path = ""; ++RegexArray toBlock, toWarn, toReport; ++static bool useMisbehavingHandler = false; ++static std::string modeStr; + + // Command line parameters. +-constexpr const char* kArgDelaySpecific = "--delay="; ++constexpr const char* kArgDelaySpecific = "--delays="; + constexpr const char* kArgPath = "--path="; + constexpr const char* kArgQueued = "--queued"; + constexpr const char* kArgThreads = "--threads="; + constexpr const char* kArgUserSpecific = "--user"; ++constexpr const char* kArgToBlock = "--toblock="; ++constexpr const char* kArgToWarn = "--towarn="; ++constexpr const char* kArgToReport = "--toreport="; ++constexpr const char* kArgMisbehave = "--misbehave="; + constexpr const char* kArgHelp = "--help"; + constexpr const char* kArgSavePrintRequestDataTo = "--save-print-request-data-to="; + ++std::map<std::string, Mode> sStringToMode = { ++#define AGENT_MODE(name) {#name, Mode::Mode_##name}, ++#include "modes.h" ++#undef AGENT_MODE ++}; ++ ++std::map<Mode, std::string> sModeToString = { ++#define AGENT_MODE(name) {Mode::Mode_##name, #name}, ++#include "modes.h" ++#undef AGENT_MODE ++}; ++ ++std::vector<std::pair<std::string, std::regex>> ++ParseRegex(const std::string str) { ++ std::vector<std::pair<std::string, std::regex>> ret; ++ for (auto it = str.begin(); it != str.end(); /* nop */) { ++ auto it2 = std::find(it, str.end(), ','); ++ ret.push_back(std::make_pair(std::string(it, it2), std::regex(it, it2))); ++ it = it2 == str.end() ? it2 : it2 + 1; ++ } ++ ++ return ret; ++} ++ + bool ParseCommandLine(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + const std::string arg = argv[i]; +@@ -44,16 +81,38 @@ bool ParseCommandLine(int argc, char* argv[]) { + path = kPathUser; + user_specific = true; + } else if (arg.find(kArgDelaySpecific) == 0) { +- delay = std::stoul(arg.substr(strlen(kArgDelaySpecific))); ++ std::string delaysStr = arg.substr(strlen(kArgDelaySpecific)); ++ delays.clear(); ++ size_t posStart = 0, posEnd; ++ unsigned long delay; ++ while ((posEnd = delaysStr.find(',', posStart)) != std::string::npos) { ++ delay = std::stoul(delaysStr.substr(posStart, posEnd - posStart)); ++ if (delay > 30) { ++ delay = 30; ++ } ++ delays.push_back(delay); ++ posStart = posEnd + 1; ++ } ++ delay = std::stoul(delaysStr.substr(posStart)); + if (delay > 30) { + delay = 30; + } ++ delays.push_back(delay); + } else if (arg.find(kArgPath) == 0) { + path = arg.substr(strlen(kArgPath)); + } else if (arg.find(kArgQueued) == 0) { + use_queue = true; + } else if (arg.find(kArgThreads) == 0) { + num_threads = std::stoul(arg.substr(strlen(kArgThreads))); ++ } else if (arg.find(kArgToBlock) == 0) { ++ toBlock = ParseRegex(arg.substr(strlen(kArgToBlock))); ++ } else if (arg.find(kArgToWarn) == 0) { ++ toWarn = ParseRegex(arg.substr(strlen(kArgToWarn))); ++ } else if (arg.find(kArgToReport) == 0) { ++ toReport = ParseRegex(arg.substr(strlen(kArgToReport))); ++ } else if (arg.find(kArgMisbehave) == 0) { ++ modeStr = arg.substr(strlen(kArgMisbehave)); ++ useMisbehavingHandler = true; + } else if (arg.find(kArgHelp) == 0) { + return false; + } else if (arg.find(kArgSavePrintRequestDataTo) == 0) { +@@ -72,13 +131,17 @@ void PrintHelp() { + << "A simple agent to process content analysis requests." << std::endl + << "Data containing the string 'block' blocks the request data from being used." << std::endl + << std::endl << "Options:" << std::endl +- << kArgDelaySpecific << "<delay> : Add a delay to request processing in seconds (max 30)." << std::endl ++ << kArgDelaySpecific << "<delay1,delay2,...> : Add delays to request processing in seconds. Delays are limited to 30 seconds and are applied round-robin to requests. Default is 0." << std::endl + << kArgPath << " <path> : Used the specified path instead of default. Must come after --user." << std::endl + << kArgQueued << " : Queue requests for processing in a background thread" << std::endl + << kArgThreads << " : When queued, number of threads in the request processing thread pool" << std::endl + << kArgUserSpecific << " : Make agent OS user specific." << std::endl + << kArgHelp << " : prints this help message" << std::endl +- << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests"; ++ << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests" << std::endl ++ << kArgToBlock << "<regex> : Regular expression matching file and text content to block." << std::endl ++ << kArgToWarn << "<regex> : Regular expression matching file and text content to warn about." << std::endl ++ << kArgToReport << "<regex> : Regular expression matching file and text content to report." << std::endl ++ << kArgMisbehave << "<mode> : Use 'misbehaving' agent in given mode for testing purposes." << std::endl; + } + + int main(int argc, char* argv[]) { +@@ -87,9 +150,17 @@ int main(int argc, char* argv[]) { + return 1; + } + +- auto handler = use_queue +- ? std::make_unique<QueuingHandler>(num_threads, delay, save_print_data_path) +- : std::make_unique<Handler>(delay, save_print_data_path); ++ auto handler = ++ useMisbehavingHandler ++ ? MisbehavingHandler::Create(modeStr, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) ++ : use_queue ++ ? std::make_unique<QueuingHandler>(num_threads, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) ++ : std::make_unique<Handler>(std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)); ++ ++ if (!handler) { ++ std::cout << "[Demo] Failed to construct handler." << std::endl; ++ return 1; ++ } + + // Each agent uses a unique name to identify itself with Google Chrome. + content_analysis::sdk::ResultCode rc; +diff --git a/demo/handler.h b/demo/handler.h +index 9d1ccfdf9857a..88599963c51b0 100644 +--- a/demo/handler.h ++++ b/demo/handler.h +@@ -7,31 +7,51 @@ + + #include <time.h> + ++#include <algorithm> ++#include <atomic> + #include <chrono> + #include <cstdio> + #include <fstream> + #include <iostream> ++#include <optional> + #include <thread> + #include <utility> ++#include <regex> + #include <vector> + + #include "content_analysis/sdk/analysis_agent.h" + #include "demo/atomic_output.h" + #include "demo/request_queue.h" + ++using RegexArray = std::vector<std::pair<std::string, std::regex>>; ++ + // An AgentEventHandler that dumps requests information to stdout and blocks + // any requests that have the keyword "block" in their data + class Handler : public content_analysis::sdk::AgentEventHandler { + public: + using Event = content_analysis::sdk::ContentAnalysisEvent; + +- Handler(unsigned long delay, const std::string& print_data_file_path) : +- delay_(delay), print_data_file_path_(print_data_file_path) { +- } ++ Handler(std::vector<unsigned long>&& delays, const std::string& print_data_file_path, ++ RegexArray&& toBlock = RegexArray(), ++ RegexArray&& toWarn = RegexArray(), ++ RegexArray&& toReport = RegexArray()) : ++ toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)), ++ delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {} + +- unsigned long delay() { return delay_; } ++ const std::vector<unsigned long> delays() { return delays_; } ++ size_t nextDelayIndex() const { return nextDelayIndex_; } + + protected: ++ // subclasses can override this ++ // returns whether the response has been set ++ virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) { ++ return false; ++ } ++ // subclasses can override this ++ // returns whether the response has been sent ++ virtual bool SendCustomResponse(std::unique_ptr<Event>& event) { ++ return false; ++ } + // Analyzes one request from Google Chrome and responds back to the browser + // with either an allow or block verdict. + void AnalyzeContent(AtomicCout& aout, std::unique_ptr<Event> event) { +@@ -43,29 +63,25 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + + DumpEvent(aout.stream(), event.get()); + +- bool block = false; + bool success = true; +- unsigned long delay = delay_; +- +- if (event->GetRequest().has_text_content()) { +- block = ShouldBlockRequest( +- event->GetRequest().text_content()); +- GetFileSpecificDelay(event->GetRequest().text_content(), &delay); +- } else if (event->GetRequest().has_file_path()) { +- std::string content; +- success = +- ReadContentFromFile(event->GetRequest().file_path(), +- &content); +- if (success) { +- block = ShouldBlockRequest(content); +- GetFileSpecificDelay(content, &delay); ++ std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse; ++ bool setResponse = SetCustomResponse(aout, event); ++ if (!setResponse) { ++ caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; ++ if (event->GetRequest().has_text_content()) { ++ caResponse = DecideCAResponse( ++ event->GetRequest().text_content(), aout.stream()); ++ } else if (event->GetRequest().has_file_path()) { ++ // TODO: Fix downloads to store file *first* so we can check contents. ++ // Until then, just check the file name: ++ caResponse = DecideCAResponse( ++ event->GetRequest().file_path(), aout.stream()); ++ } else if (event->GetRequest().has_print_data()) { ++ // In the case of print request, normally the PDF bytes would be parsed ++ // for sensitive data violations. To keep this class simple, only the ++ // URL is checked for the word "block". ++ caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream()); + } +- } else if (event->GetRequest().has_print_data()) { +- // In the case of print request, normally the PDF bytes would be parsed +- // for sensitive data violations. To keep this class simple, only the +- // URL is checked for the word "block". +- block = ShouldBlockRequest(event->GetRequest().request_data().url()); +- GetFileSpecificDelay(event->GetRequest().request_data().url(), &delay); + } + + if (!success) { +@@ -75,22 +91,44 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE); + aout.stream() << " Verdict: failed to reach verdict: "; + aout.stream() << event->DebugString() << std::endl; +- } else if (block) { +- auto rc = content_analysis::sdk::SetEventVerdictToBlock(event.get()); +- aout.stream() << " Verdict: block"; +- if (rc != content_analysis::sdk::ResultCode::OK) { +- aout.stream() << " error: " +- << content_analysis::sdk::ResultCodeToString(rc) << std::endl; +- aout.stream() << " " << event->DebugString() << std::endl; ++ } else { ++ aout.stream() << " Verdict: "; ++ if (caResponse) { ++ switch (caResponse.value()) { ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK: ++ aout.stream() << "BLOCK"; ++ break; ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN: ++ aout.stream() << "WARN"; ++ break; ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY: ++ aout.stream() << "REPORT_ONLY"; ++ break; ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED: ++ aout.stream() << "ACTION_UNSPECIFIED"; ++ break; ++ default: ++ aout.stream() << "<error>"; ++ break; ++ } ++ auto rc = ++ content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value()); ++ if (rc != content_analysis::sdk::ResultCode::OK) { ++ aout.stream() << " error: " ++ << content_analysis::sdk::ResultCodeToString(rc) << std::endl; ++ aout.stream() << " " << event->DebugString() << std::endl; ++ } ++ aout.stream() << std::endl; ++ } else { ++ aout.stream() << " Verdict: allow" << std::endl; + } + aout.stream() << std::endl; +- } else { +- aout.stream() << " Verdict: allow" << std::endl; + } +- + aout.stream() << std::endl; + + // If a delay is specified, wait that much. ++ size_t nextDelayIndex = nextDelayIndex_.fetch_add(1); ++ unsigned long delay = delays_[nextDelayIndex % delays_.size()]; + if (delay > 0) { + aout.stream() << "Delaying response to " << event->GetRequest().request_token() + << " for " << delay << "s" << std::endl<< std::endl; +@@ -99,16 +137,19 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + } + + // Send the response back to Google Chrome. +- auto rc = event->Send(); +- if (rc != content_analysis::sdk::ResultCode::OK) { +- aout.stream() << "[Demo] Error sending response: " +- << content_analysis::sdk::ResultCodeToString(rc) +- << std::endl; +- aout.stream() << event->DebugString() << std::endl; ++ bool sentCustomResponse = SendCustomResponse(event); ++ if (!sentCustomResponse) { ++ auto rc = event->Send(); ++ if (rc != content_analysis::sdk::ResultCode::OK) { ++ aout.stream() << "[Demo] Error sending response: " ++ << content_analysis::sdk::ResultCodeToString(rc) ++ << std::endl; ++ aout.stream() << event->DebugString() << std::endl; ++ } + } + } + +- private: ++ protected: + void OnBrowserConnected( + const content_analysis::sdk::BrowserInfo& info) override { + AtomicCout aout; +@@ -362,21 +403,40 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + return true; + } + +- bool ShouldBlockRequest(const std::string& content) { +- // Determines if the request should be blocked. For this simple example +- // the content is blocked if the string "block" is found. Otherwise the +- // content is allowed. +- return content.find("block") != std::string::npos; +- } +- +- void GetFileSpecificDelay(const std::string& content, unsigned long* delay) { +- auto pos = content.find("delay="); +- if (pos != std::string::npos) { +- std::sscanf(content.substr(pos).c_str(), "delay=%lu", delay); ++ std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> ++ DecideCAResponse(const std::string& content, std::stringstream& stream) { ++ for (auto& r : toBlock_) { ++ if (std::regex_search(content, r.second)) { ++ stream << "'" << content << "' matches BLOCK regex '" ++ << r.first << "'" << std::endl; ++ return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; ++ } + } ++ for (auto& r : toWarn_) { ++ if (std::regex_search(content, r.second)) { ++ stream << "'" << content << "' matches WARN regex '" ++ << r.first << "'" << std::endl; ++ return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN; ++ } ++ } ++ for (auto& r : toReport_) { ++ if (std::regex_search(content, r.second)) { ++ stream << "'" << content << "' matches REPORT_ONLY regex '" ++ << r.first << "'" << std::endl; ++ return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY; ++ } ++ } ++ stream << "'" << content << "' was ALLOWed\n"; ++ return {}; + } + +- unsigned long delay_; ++ // For the demo, block any content that matches these wildcards. ++ RegexArray toBlock_; ++ RegexArray toWarn_; ++ RegexArray toReport_; ++ ++ std::vector<unsigned long> delays_; ++ std::atomic<size_t> nextDelayIndex_; + std::string print_data_file_path_; + }; + +@@ -384,8 +444,11 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + // any requests that have the keyword "block" in their data + class QueuingHandler : public Handler { + public: +- QueuingHandler(unsigned long threads, unsigned long delay, const std::string& print_data_file_path) +- : Handler(delay, print_data_file_path) { ++ QueuingHandler(unsigned long threads, std::vector<unsigned long>&& delays, const std::string& print_data_file_path, ++ RegexArray&& toBlock = RegexArray(), ++ RegexArray&& toWarn = RegexArray(), ++ RegexArray&& toReport = RegexArray()) ++ : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) { + StartBackgroundThreads(threads); + } + +@@ -421,6 +484,8 @@ class QueuingHandler : public Handler { + aout.stream() << std::endl << "----------" << std::endl; + aout.stream() << "Thread: " << std::this_thread::get_id() + << std::endl; ++ aout.stream() << "Delaying request processing for " ++ << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "s" << std::endl << std::endl; + aout.flush(); + + handler->AnalyzeContent(aout, std::move(event)); +-- +2.42.0.windows.2 + diff --git a/third_party/content_analysis_sdk/browser/src/client_mac.cc b/third_party/content_analysis_sdk/browser/src/client_mac.cc index c6f5a798c1..fd0902e516 100644 --- a/third_party/content_analysis_sdk/browser/src/client_mac.cc +++ b/third_party/content_analysis_sdk/browser/src/client_mac.cc @@ -11,7 +11,7 @@ namespace sdk { // static std::unique_ptr<Client> Client::Create(Config config) { - return std::make_unique<ClientMac>(std::move(config)); + return nullptr; } ClientMac::ClientMac(Config config) : ClientBase(std::move(config)) {} @@ -30,4 +30,4 @@ int ClientMac::CancelRequests(const ContentAnalysisCancelRequests& cancel) { } } // namespace sdk -} // namespace content_analysis
\ No newline at end of file +} // namespace content_analysis diff --git a/third_party/content_analysis_sdk/browser/src/client_posix.cc b/third_party/content_analysis_sdk/browser/src/client_posix.cc index 14277724fd..bd62b845a0 100644 --- a/third_party/content_analysis_sdk/browser/src/client_posix.cc +++ b/third_party/content_analysis_sdk/browser/src/client_posix.cc @@ -11,7 +11,7 @@ namespace sdk { // static std::unique_ptr<Client> Client::Create(Config config) { - return std::make_unique<ClientPosix>(std::move(config)); + return nullptr; } ClientPosix::ClientPosix(Config config) : ClientBase(std::move(config)) {} diff --git a/third_party/content_analysis_sdk/demo/agent.cc b/third_party/content_analysis_sdk/demo/agent.cc index c3640018e6..3e168b0915 100644 --- a/third_party/content_analysis_sdk/demo/agent.cc +++ b/third_party/content_analysis_sdk/demo/agent.cc @@ -136,12 +136,12 @@ void PrintHelp() { << kArgQueued << " : Queue requests for processing in a background thread" << std::endl << kArgThreads << " : When queued, number of threads in the request processing thread pool" << std::endl << kArgUserSpecific << " : Make agent OS user specific." << std::endl + << kArgHelp << " : prints this help message" << std::endl << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests" << std::endl << kArgToBlock << "<regex> : Regular expression matching file and text content to block." << std::endl << kArgToWarn << "<regex> : Regular expression matching file and text content to warn about." << std::endl << kArgToReport << "<regex> : Regular expression matching file and text content to report." << std::endl - << kArgMisbehave << "<mode> : Use 'misbehaving' agent in given mode for testing purposes." << std::endl - << kArgHelp << " : prints this help message" << std::endl; + << kArgMisbehave << "<mode> : Use 'misbehaving' agent in given mode for testing purposes." << std::endl; } int main(int argc, char* argv[]) { @@ -150,10 +150,9 @@ int main(int argc, char* argv[]) { return 1; } - // TODO: Add toBlock, toWarn, toReport to QueueingHandler auto handler = useMisbehavingHandler - ? MisbehavingHandler::Create(delays[0], modeStr) + ? MisbehavingHandler::Create(modeStr, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) : use_queue ? std::make_unique<QueuingHandler>(num_threads, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) : std::make_unique<Handler>(std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)); diff --git a/third_party/content_analysis_sdk/demo/client.cc b/third_party/content_analysis_sdk/demo/client.cc index 5e47fca57f..84ca6e2356 100644 --- a/third_party/content_analysis_sdk/demo/client.cc +++ b/third_party/content_analysis_sdk/demo/client.cc @@ -317,7 +317,7 @@ void HandleRequest(const ContentAnalysisRequest& request) { global_final_action = final_action; } else { int err = client->Acknowledge( - BuildAcknowledgement(request.request_token(), final_action)); + BuildAcknowledgement(response.request_token(), final_action)); if (err != 0) { aout.stream() << "[Demo] Error sending ack " << request.request_token() << std::endl; diff --git a/third_party/content_analysis_sdk/demo/handler.h b/third_party/content_analysis_sdk/demo/handler.h index 1c9871bd08..88599963c5 100644 --- a/third_party/content_analysis_sdk/demo/handler.h +++ b/third_party/content_analysis_sdk/demo/handler.h @@ -10,6 +10,7 @@ #include <algorithm> #include <atomic> #include <chrono> +#include <cstdio> #include <fstream> #include <iostream> #include <optional> @@ -41,34 +42,46 @@ class Handler : public content_analysis::sdk::AgentEventHandler { size_t nextDelayIndex() const { return nextDelayIndex_; } protected: + // subclasses can override this + // returns whether the response has been set + virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) { + return false; + } + // subclasses can override this + // returns whether the response has been sent + virtual bool SendCustomResponse(std::unique_ptr<Event>& event) { + return false; + } // Analyzes one request from Google Chrome and responds back to the browser // with either an allow or block verdict. - void AnalyzeContent(std::stringstream& stream, std::unique_ptr<Event> event) { + void AnalyzeContent(AtomicCout& aout, std::unique_ptr<Event> event) { // An event represents one content analysis request and response triggered // by a user action in Google Chrome. The agent determines whether the // user is allowed to perform the action by examining event->GetRequest(). // The verdict, which can be "allow" or "block" is written into // event->GetResponse(). - DumpEvent(stream, event.get()); + DumpEvent(aout.stream(), event.get()); bool success = true; - std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse = - content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; - - if (event->GetRequest().has_text_content()) { - caResponse = DecideCAResponse( - event->GetRequest().text_content(), stream); - } else if (event->GetRequest().has_file_path()) { - // TODO: Fix downloads to store file *first* so we can check contents. - // Until then, just check the file name: - caResponse = DecideCAResponse( - event->GetRequest().file_path(), stream); - } else if (event->GetRequest().has_print_data()) { - // In the case of print request, normally the PDF bytes would be parsed - // for sensitive data violations. To keep this class simple, only the - // URL is checked for the word "block". - caResponse = DecideCAResponse(event->GetRequest().request_data().url(), stream); + std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse; + bool setResponse = SetCustomResponse(aout, event); + if (!setResponse) { + caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; + if (event->GetRequest().has_text_content()) { + caResponse = DecideCAResponse( + event->GetRequest().text_content(), aout.stream()); + } else if (event->GetRequest().has_file_path()) { + // TODO: Fix downloads to store file *first* so we can check contents. + // Until then, just check the file name: + caResponse = DecideCAResponse( + event->GetRequest().file_path(), aout.stream()); + } else if (event->GetRequest().has_print_data()) { + // In the case of print request, normally the PDF bytes would be parsed + // for sensitive data violations. To keep this class simple, only the + // URL is checked for the word "block". + caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream()); + } } if (!success) { @@ -76,61 +89,67 @@ class Handler : public content_analysis::sdk::AgentEventHandler { event->GetResponse(), std::string(), content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE); - stream << " Verdict: failed to reach verdict: "; - stream << event->DebugString() << std::endl; + aout.stream() << " Verdict: failed to reach verdict: "; + aout.stream() << event->DebugString() << std::endl; } else { - stream << " Verdict: "; + aout.stream() << " Verdict: "; if (caResponse) { switch (caResponse.value()) { case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK: - stream << "BLOCK"; + aout.stream() << "BLOCK"; break; case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN: - stream << "WARN"; + aout.stream() << "WARN"; break; case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY: - stream << "REPORT_ONLY"; + aout.stream() << "REPORT_ONLY"; break; case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED: - stream << "ACTION_UNSPECIFIED"; + aout.stream() << "ACTION_UNSPECIFIED"; break; default: - stream << "<error>"; + aout.stream() << "<error>"; break; } auto rc = content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value()); if (rc != content_analysis::sdk::ResultCode::OK) { - stream << " error: " - << content_analysis::sdk::ResultCodeToString(rc) << std::endl; - stream << " " << event->DebugString() << std::endl; + aout.stream() << " error: " + << content_analysis::sdk::ResultCodeToString(rc) << std::endl; + aout.stream() << " " << event->DebugString() << std::endl; } - stream << std::endl; + aout.stream() << std::endl; } else { - stream << " Verdict: allow" << std::endl; + aout.stream() << " Verdict: allow" << std::endl; } - stream << std::endl; + aout.stream() << std::endl; } - stream << std::endl; + aout.stream() << std::endl; // If a delay is specified, wait that much. size_t nextDelayIndex = nextDelayIndex_.fetch_add(1); unsigned long delay = delays_[nextDelayIndex % delays_.size()]; if (delay > 0) { + aout.stream() << "Delaying response to " << event->GetRequest().request_token() + << " for " << delay << "s" << std::endl<< std::endl; + aout.flush(); std::this_thread::sleep_for(std::chrono::seconds(delay)); } // Send the response back to Google Chrome. - auto rc = event->Send(); - if (rc != content_analysis::sdk::ResultCode::OK) { - stream << "[Demo] Error sending response: " - << content_analysis::sdk::ResultCodeToString(rc) - << std::endl; - stream << event->DebugString() << std::endl; + bool sentCustomResponse = SendCustomResponse(event); + if (!sentCustomResponse) { + auto rc = event->Send(); + if (rc != content_analysis::sdk::ResultCode::OK) { + aout.stream() << "[Demo] Error sending response: " + << content_analysis::sdk::ResultCodeToString(rc) + << std::endl; + aout.stream() << event->DebugString() << std::endl; + } } } - private: + protected: void OnBrowserConnected( const content_analysis::sdk::BrowserInfo& info) override { AtomicCout aout; @@ -155,7 +174,7 @@ class Handler : public content_analysis::sdk::AgentEventHandler { // In this example code, the event is handled synchronously. AtomicCout aout; aout.stream() << std::endl << "----------" << std::endl << std::endl; - AnalyzeContent(aout.stream(), std::move(event)); + AnalyzeContent(aout, std::move(event)); } void OnResponseAcknowledged( @@ -183,7 +202,7 @@ class Handler : public content_analysis::sdk::AgentEventHandler { } AtomicCout aout; - aout.stream() << "Ack: " << ack.request_token() << std::endl; + aout.stream() << " Ack: " << ack.request_token() << std::endl; aout.stream() << " Final action: " << final_action << std::endl; } void OnCancelRequests( @@ -206,31 +225,62 @@ class Handler : public content_analysis::sdk::AgentEventHandler { void DumpEvent(std::stringstream& stream, Event* event) { time_t now = time(nullptr); - stream << "Received at: " << ctime(&now); // Returned string includes \n. + stream << "Received at: " << ctime(&now); // Includes \n. + stream << "Received from: pid=" << event->GetBrowserInfo().pid + << " path=" << event->GetBrowserInfo().binary_path << std::endl; const content_analysis::sdk::ContentAnalysisRequest& request = event->GetRequest(); std::string connector = "<Unknown>"; if (request.has_analysis_connector()) { - switch (request.analysis_connector()) - { - case content_analysis::sdk::FILE_DOWNLOADED: - connector = "download"; - break; - case content_analysis::sdk::FILE_ATTACHED: - connector = "attach"; - break; - case content_analysis::sdk::BULK_DATA_ENTRY: - connector = "bulk-data-entry"; - break; - case content_analysis::sdk::PRINT: - connector = "print"; - break; - case content_analysis::sdk::FILE_TRANSFER: - connector = "file-transfer"; - break; - default: - break; + switch (request.analysis_connector()) { + case content_analysis::sdk::FILE_DOWNLOADED: + connector = "download"; + break; + case content_analysis::sdk::FILE_ATTACHED: + connector = "attach"; + break; + case content_analysis::sdk::BULK_DATA_ENTRY: + connector = "bulk-data-entry"; + break; + case content_analysis::sdk::PRINT: + connector = "print"; + break; + case content_analysis::sdk::FILE_TRANSFER: + connector = "file-transfer"; + break; + default: + break; + } + } + std::string reason; + if (request.has_reason()) { + using content_analysis::sdk::ContentAnalysisRequest; + switch (request.reason()) { + case content_analysis::sdk::ContentAnalysisRequest::UNKNOWN: + reason = "<Unknown>"; + break; + case content_analysis::sdk::ContentAnalysisRequest::CLIPBOARD_PASTE: + reason = "CLIPBOARD_PASTE"; + break; + case content_analysis::sdk::ContentAnalysisRequest::DRAG_AND_DROP: + reason = "DRAG_AND_DROP"; + break; + case content_analysis::sdk::ContentAnalysisRequest::FILE_PICKER_DIALOG: + reason = "FILE_PICKER_DIALOG"; + break; + case content_analysis::sdk::ContentAnalysisRequest::PRINT_PREVIEW_PRINT: + reason = "PRINT_PREVIEW_PRINT"; + break; + case content_analysis::sdk::ContentAnalysisRequest::SYSTEM_DIALOG_PRINT: + reason = "SYSTEM_DIALOG_PRINT"; + break; + case content_analysis::sdk::ContentAnalysisRequest::NORMAL_DOWNLOAD: + reason = "NORMAL_DOWNLOAD"; + break; + case content_analysis::sdk::ContentAnalysisRequest::SAVE_AS_DOWNLOAD: + reason = "SAVE_AS_DOWNLOAD"; + break; } } @@ -252,11 +302,7 @@ class Handler : public content_analysis::sdk::AgentEventHandler { std::string file_path = request.has_file_path() - ? request.file_path() : "<none>"; - - std::string text_content = - request.has_text_content() - ? request.text_content() : "<none>"; + ? request.file_path() : "None, bulk text entry or print"; std::string machine_user = request.has_client_metadata() && @@ -282,14 +328,35 @@ class Handler : public content_analysis::sdk::AgentEventHandler { stream << " Expires at: " << expires_at_str << " (" << secs_remaining << " seconds from now)" << std::endl; stream << " Connector: " << connector << std::endl; + if (!reason.empty()) { + stream << " Reason: " << reason << std::endl; + } stream << " URL: " << url << std::endl; stream << " Tab title: " << tab_title << std::endl; stream << " Filename: " << filename << std::endl; stream << " Digest: " << digest << std::endl; stream << " Filepath: " << file_path << std::endl; - stream << " Text content: '" << text_content << "'" << std::endl; stream << " Machine user: " << machine_user << std::endl; stream << " Email: " << email << std::endl; + + if (request.has_text_content() && !request.text_content().empty()) { + std::string prefix = " Pasted data: "; + std::string text_content = request.text_content(); + + // Truncate the text past 50 bytes to keep it to a reasonable length in + // the terminal window. + if (text_content.size() > 50) { + prefix = " Pasted data (truncated): "; + text_content = text_content.substr(0, 50) + "..."; + } + stream << prefix + << text_content + << std::endl; + stream << " Pasted data size (bytes): " + << request.text_content().size() + << std::endl; + } + if (request.has_print_data() && !print_data_file_path_.empty()) { if (request.request_data().has_print_metadata() && request.request_data().print_metadata().has_printer_name()) { @@ -415,12 +482,13 @@ class QueuingHandler : public Handler { AtomicCout aout; aout.stream() << std::endl << "----------" << std::endl; - aout.stream() << "Thread: " << std::this_thread::get_id() << std::endl; + aout.stream() << "Thread: " << std::this_thread::get_id() + << std::endl; aout.stream() << "Delaying request processing for " << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "s" << std::endl << std::endl; aout.flush(); - handler->AnalyzeContent(aout.stream(), std::move(event)); + handler->AnalyzeContent(aout, std::move(event)); } return 0; diff --git a/third_party/content_analysis_sdk/demo/handler_misbehaving.h b/third_party/content_analysis_sdk/demo/handler_misbehaving.h index d303049d98..bb0b4f18ad 100644 --- a/third_party/content_analysis_sdk/demo/handler_misbehaving.h +++ b/third_party/content_analysis_sdk/demo/handler_misbehaving.h @@ -20,6 +20,7 @@ #include "content_analysis/sdk/analysis.pb.h" #include "content_analysis/sdk/analysis_agent.h" #include "agent/src/event_win.h" +#include "handler.h" enum class Mode { // Have to use a "Mode_" prefix to avoid preprocessing problems in StringToMode @@ -93,13 +94,18 @@ static DWORD WriteBigMessageToPipe(HANDLE pipe, const std::string& message) { } // An AgentEventHandler that does various misbehaving things -class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler { +class MisbehavingHandler final : public Handler { public: using Event = content_analysis::sdk::ContentAnalysisEvent; static - std::unique_ptr<AgentEventHandler> Create(unsigned long delay, - const std::string& modeStr) { + std::unique_ptr<AgentEventHandler> Create( + const std::string& modeStr, + std::vector<unsigned long>&& delays, + const std::string& print_data_file_path, + RegexArray&& toBlock = RegexArray(), + RegexArray&& toWarn = RegexArray(), + RegexArray&& toReport = RegexArray()) { auto it = sStringToMode.find(modeStr); if (it == sStringToMode.end()) { std::cout << "\"" << modeStr << "\"" @@ -107,11 +113,17 @@ class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler return nullptr; } - return std::unique_ptr<AgentEventHandler>(new MisbehavingHandler(delay, it->second)); + return std::unique_ptr<AgentEventHandler>(new MisbehavingHandler(it->second, std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport))); } private: - MisbehavingHandler(unsigned long delay, Mode mode) : delay_(delay), mode_(mode) {} + MisbehavingHandler(Mode mode, std::vector<unsigned long>&& delays, const std::string& print_data_file_path, + RegexArray&& toBlock = RegexArray(), + RegexArray&& toWarn = RegexArray(), + RegexArray&& toReport = RegexArray()) : + Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)), + mode_(mode) {} + template <size_t N> DWORD SendBytesOverPipe(const unsigned char (&bytes)[N], @@ -124,20 +136,11 @@ class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler return WriteBigMessageToPipe(pipe, s); } - // Analyzes one request from Google Chrome and responds back to the browser - // with either an allow or block verdict. - void AnalyzeContent(std::unique_ptr<Event> event) { - // An event represents one content analysis request and response triggered - // by a user action in Google Chrome. The agent determines whether the - // user is allowed to perform the action by examining event->GetRequest(). - // The verdict, which can be "allow" or "block" is written into - // event->GetResponse(). - + bool SetCustomResponse(AtomicCout& aout, std::unique_ptr<Event>& event) override { std::cout << std::endl << "----------" << std::endl << std::endl; - - DumpRequest(event->GetRequest()); std::cout << "Mode is " << sModeToString[mode_] << std::endl; + bool handled = true; if (mode_ == Mode::Mode_largeResponse) { for (size_t i = 0; i < 1000; ++i) { content_analysis::sdk::ContentAnalysisResponse_Result* result = @@ -177,7 +180,7 @@ class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler event->GetResponse().clear_results(); } else if (mode_ == Mode::Mode_resultWithInvalidStatus) { // This causes an assertion failure and the process exits - // So we just serialize this ourselves below + // So we just serialize this ourselves in SendCustomResponse() /*content_analysis::sdk::ContentAnalysisResponse_Result* result = event->GetResponse().mutable_results(0); result->set_status( @@ -185,38 +188,12 @@ class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler ::content_analysis::sdk::ContentAnalysisResponse_Result_Status>( 100));*/ } else { - bool block = false; - - if (event->GetRequest().has_text_content()) { - block = ShouldBlockRequest(event->GetRequest().text_content()); - } else if (event->GetRequest().has_file_path()) { - block = ShouldBlockRequest(event->GetRequest().file_path()); - } - - if (block) { - auto rc = content_analysis::sdk::SetEventVerdictToBlock(event.get()); - std::cout << " Verdict: block"; - if (rc != content_analysis::sdk::ResultCode::OK) { - std::cout << " error: " - << content_analysis::sdk::ResultCodeToString(rc) - << std::endl; - std::cout << " " << event->DebugString() << std::endl; - } - std::cout << std::endl; - } else { - std::cout << " Verdict: allow" << std::endl; - } - } - - std::cout << std::endl; - - // If a delay is specified, wait that much. - if (delay_ > 0) { - std::cout << "[Demo] delaying request processing for " << delay_ << "s" - << std::endl; - std::this_thread::sleep_for(std::chrono::seconds(delay_)); + handled = false; } + return handled; + } + bool SendCustomResponse(std::unique_ptr<Event>& event) override { if (mode_ == Mode::Mode_largeResponse) { content_analysis::sdk::ContentAnalysisEventWin* eventWin = static_cast<content_analysis::sdk::ContentAnalysisEventWin*>( @@ -301,194 +278,12 @@ class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler // bit, indicating there should be a byte after this SendBytesOverPipe(bytes, event); } else { - std::cout << "(misbehaving) Handler::AnalyzeContent() about to call " - "event->Send(), mode is " - << sModeToString[mode_] << std::endl; - // Send the response back to Google Chrome. - auto rc = event->Send(); - if (rc != content_analysis::sdk::ResultCode::OK) { - std::cout << "[Demo] Error sending response: " - << content_analysis::sdk::ResultCodeToString(rc) << std::endl; - std::cout << event->DebugString() << std::endl; - } - } - } - - private: - void OnBrowserConnected( - const content_analysis::sdk::BrowserInfo& info) override { - std::cout << std::endl << "==========" << std::endl; - std::cout << "Browser connected pid=" << info.pid << std::endl; - } - - void OnBrowserDisconnected( - const content_analysis::sdk::BrowserInfo& info) override { - std::cout << std::endl - << "Browser disconnected pid=" << info.pid << std::endl; - std::cout << "==========" << std::endl; - } - - void OnAnalysisRequested(std::unique_ptr<Event> event) override { - // If the agent is capable of analyzing content in the background, the - // events may be handled in background threads. Having said that, a - // event should not be assumed to be thread safe, that is, it should not - // be accessed by more than one thread concurrently. - // - // In this example code, the event is handled synchronously. - AnalyzeContent(std::move(event)); - } - void OnResponseAcknowledged( - const content_analysis::sdk::ContentAnalysisAcknowledgement& ack) - override { - const char* final_action = "<Unknown>"; - if (ack.has_final_action()) { - switch (ack.final_action()) { - case content_analysis::sdk::ContentAnalysisAcknowledgement:: - ACTION_UNSPECIFIED: - final_action = "<Unspecified>"; - break; - case content_analysis::sdk::ContentAnalysisAcknowledgement::ALLOW: - final_action = "Allow"; - break; - case content_analysis::sdk::ContentAnalysisAcknowledgement::REPORT_ONLY: - final_action = "Report only"; - break; - case content_analysis::sdk::ContentAnalysisAcknowledgement::WARN: - final_action = "Warn"; - break; - case content_analysis::sdk::ContentAnalysisAcknowledgement::BLOCK: - final_action = "Block"; - break; - } - } - - std::cout << "Ack: " << ack.request_token() << std::endl; - std::cout << " Final action: " << final_action << std::endl; - } - void OnCancelRequests( - const content_analysis::sdk::ContentAnalysisCancelRequests& cancel) - override { - std::cout << "Cancel: " << std::endl; - std::cout << " User action ID: " << cancel.user_action_id() << std::endl; - } - - void OnInternalError(const char* context, - content_analysis::sdk::ResultCode error) override { - std::cout << std::endl - << "*ERROR*: context=\"" << context << "\" " - << content_analysis::sdk::ResultCodeToString(error) << std::endl; - } - - void DumpRequest( - const content_analysis::sdk::ContentAnalysisRequest& request) { - std::string connector = "<Unknown>"; - if (request.has_analysis_connector()) { - switch (request.analysis_connector()) { - case content_analysis::sdk::FILE_DOWNLOADED: - connector = "download"; - break; - case content_analysis::sdk::FILE_ATTACHED: - connector = "attach"; - break; - case content_analysis::sdk::BULK_DATA_ENTRY: - connector = "bulk-data-entry"; - break; - case content_analysis::sdk::PRINT: - connector = "print"; - break; - case content_analysis::sdk::FILE_TRANSFER: - connector = "file-transfer"; - break; - default: - break; - } + return false; } - - std::string url = - request.has_request_data() && request.request_data().has_url() - ? request.request_data().url() - : "<No URL>"; - - std::string tab_title = - request.has_request_data() && request.request_data().has_tab_title() - ? request.request_data().tab_title() - : "<No tab title>"; - - std::string filename = - request.has_request_data() && request.request_data().has_filename() - ? request.request_data().filename() - : "<No filename>"; - - std::string digest = - request.has_request_data() && request.request_data().has_digest() - ? request.request_data().digest() - : "<No digest>"; - - std::string file_path = - request.has_file_path() ? request.file_path() : "<none>"; - - std::string text_content = - request.has_text_content() ? request.text_content() : "<none>"; - - std::string machine_user = - request.has_client_metadata() && - request.client_metadata().has_browser() && - request.client_metadata().browser().has_machine_user() - ? request.client_metadata().browser().machine_user() - : "<No machine user>"; - - std::string email = - request.has_request_data() && request.request_data().has_email() - ? request.request_data().email() - : "<No email>"; - - time_t t = request.expires_at(); - - std::string user_action_id = request.has_user_action_id() - ? request.user_action_id() - : "<No user action id>"; - - std::cout << "Request: " << request.request_token() << std::endl; - std::cout << " User action ID: " << user_action_id << std::endl; - std::cout << " Expires at: " << ctime(&t); // Returned string includes \n. - std::cout << " Connector: " << connector << std::endl; - std::cout << " URL: " << url << std::endl; - std::cout << " Tab title: " << tab_title << std::endl; - std::cout << " Filename: " << filename << std::endl; - std::cout << " Digest: " << digest << std::endl; - std::cout << " Filepath: " << file_path << std::endl; - std::cout << " Text content: '" << text_content << "'" << std::endl; - std::cout << " Machine user: " << machine_user << std::endl; - std::cout << " Email: " << email << std::endl; - } - - bool ReadContentFromFile(const std::string& file_path, std::string* content) { - std::ifstream file(file_path, - std::ios::in | std::ios::binary | std::ios::ate); - if (!file.is_open()) return false; - - // Get file size. This example does not handle files larger than 1MB. - // Make sure content string can hold the contents of the file. - int size = file.tellg(); - if (size > 1024 * 1024) return false; - - content->resize(size + 1); - - // Read file into string. - file.seekg(0, std::ios::beg); - file.read(&(*content)[0], size); - content->at(size) = 0; return true; } - bool ShouldBlockRequest(const std::string& content) { - // Determines if the request should be blocked. (not needed for the - // misbehaving agent) - std::cout << "'" << content << "' was not blocked\n"; - return false; - } - - unsigned long delay_; + private: Mode mode_; }; diff --git a/third_party/content_analysis_sdk/moz.yaml b/third_party/content_analysis_sdk/moz.yaml new file mode 100644 index 0000000000..9d12c72924 --- /dev/null +++ b/third_party/content_analysis_sdk/moz.yaml @@ -0,0 +1,33 @@ +schema: 1 + +bugzilla: + product: Firefox + component: Data Loss Prevention + +origin: + name: Content Analysis SDK + description: SDK that DLP agents may use to interoperate with web browsers + url: https://github.com/chromium/content_analysis_sdk + release: 3d82f7523b557d0d5c75e1acf28c3deb8081ead1 (2024-04-03T14:44:34Z). + revision: 3d82f7523b557d0d5c75e1acf28c3deb8081ead1 + license: BSD-3-Clause + +vendoring: + url: https://github.com/chromium/content_analysis_sdk + source-hosting: github + exclude: + - .gitattributes + keep: + - demo/handler_misbehaving.h + - demo/modes.h + patches: + - agent_improvements.patch + +updatebot: + maintainer-phab: "#dlp-reviewers" + maintainer-bz: davidp99@gmail.com + tasks: + - type: vendoring + enabled: True + frequency: every + blocking: 1885485 diff --git a/third_party/content_analysis_sdk/prepare_build b/third_party/content_analysis_sdk/prepare_build index ce68760f0a..b61cdc42a5 100644 --- a/third_party/content_analysis_sdk/prepare_build +++ b/third_party/content_analysis_sdk/prepare_build @@ -1,48 +1,48 @@ -#!/bin/bash
-# Copyright 2022 The Chromium Authors.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# This script is meant to be run once to setup the example demo agent.
-# Run it with one command line argument: the path to a directory where the
-# demo agent will be built. This should be a directory outside the SDK
-# directory tree. By default, if no directory is supplied, a directory
-# named `build` in the project root will be used.
-#
-# Once the build is prepared, the demo binary is built using the command
-# `cmake --build <build-dir>`, where <build-dir> is the same argument given
-# to this script.
-
-set -euo pipefail
-
-export ROOT_DIR=$(realpath $(dirname $0))
-export DEMO_DIR=$(realpath $ROOT_DIR/demo)
-export PROTO_DIR=$(realpath $ROOT_DIR/proto)
-# Defaults to $ROOT_DIR/build if no argument is provided.
-export BUILD_DIR=$(realpath ${1:-$ROOT_DIR/build})
-
-echo Root dir: $ROOT_DIR
-echo Build dir: $BUILD_DIR
-echo Demo dir: $DEMO_DIR
-echo Proto dir: $PROTO_DIR
-
-# Prepare build directory
-mkdir -p $BUILD_DIR
-# Prepare protobuf out directory
-mkdir -p $BUILD_DIR/gen
-# Enter build directory
-cd $BUILD_DIR
-
-# Install vcpkg and use it to install Google Protocol Buffers.
-test -d vcpkg || (
- git clone https://github.com/microsoft/vcpkg
- ./vcpkg/bootstrap-vcpkg.sh -disableMetrics
-)
-# Install any packages we want from vcpkg.
-./vcpkg/vcpkg install protobuf
-./vcpkg/vcpkg install gtest
-
-# Generate the build files.
-export CMAKE_TOOLCHAIN_FILE=./vcpkg/scripts/buildsystems/vcpkg.cmake
-cmake $ROOT_DIR
-
+#!/bin/bash +# Copyright 2022 The Chromium Authors. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# This script is meant to be run once to setup the example demo agent. +# Run it with one command line argument: the path to a directory where the +# demo agent will be built. This should be a directory outside the SDK +# directory tree. By default, if no directory is supplied, a directory +# named `build` in the project root will be used. +# +# Once the build is prepared, the demo binary is built using the command +# `cmake --build <build-dir>`, where <build-dir> is the same argument given +# to this script. + +set -euo pipefail + +export ROOT_DIR=$(realpath $(dirname $0)) +export DEMO_DIR=$(realpath $ROOT_DIR/demo) +export PROTO_DIR=$(realpath $ROOT_DIR/proto) +# Defaults to $ROOT_DIR/build if no argument is provided. +export BUILD_DIR=$(realpath ${1:-$ROOT_DIR/build}) + +echo Root dir: $ROOT_DIR +echo Build dir: $BUILD_DIR +echo Demo dir: $DEMO_DIR +echo Proto dir: $PROTO_DIR + +# Prepare build directory +mkdir -p $BUILD_DIR +# Prepare protobuf out directory +mkdir -p $BUILD_DIR/gen +# Enter build directory +cd $BUILD_DIR + +# Install vcpkg and use it to install Google Protocol Buffers. +test -d vcpkg || ( + git clone https://github.com/microsoft/vcpkg + ./vcpkg/bootstrap-vcpkg.sh -disableMetrics +) +# Install any packages we want from vcpkg. +./vcpkg/vcpkg install protobuf +./vcpkg/vcpkg install gtest + +# Generate the build files. +export CMAKE_TOOLCHAIN_FILE=./vcpkg/scripts/buildsystems/vcpkg.cmake +cmake $ROOT_DIR + diff --git a/third_party/content_analysis_sdk/proto/content_analysis/sdk/analysis.proto b/third_party/content_analysis_sdk/proto/content_analysis/sdk/analysis.proto index 0bbd3d4368..614b793f9b 100644 --- a/third_party/content_analysis_sdk/proto/content_analysis/sdk/analysis.proto +++ b/third_party/content_analysis_sdk/proto/content_analysis/sdk/analysis.proto @@ -156,8 +156,30 @@ message ContentAnalysisRequest { // Count of analysis requests that belong to the same user action. optional int64 user_action_requests_count = 17; + // Indicates the exact reason the request was created, ie which user action + // led to a data transfer. + enum Reason { + UNKNOWN = 0; + + // Only possible for the `FILE_ATTACHED` and `BULK_DATA_ENTRY` actions. + CLIPBOARD_PASTE = 1; + DRAG_AND_DROP = 2; + + // Only possible for the `FILE_ATTACHED` action. + FILE_PICKER_DIALOG = 3; + + // Only possible for the `PRINT` analysis connector. + PRINT_PREVIEW_PRINT = 4; + SYSTEM_DIALOG_PRINT = 5; + + // Only possible for the `FILE_DOWNLOADED` analysis connector. + NORMAL_DOWNLOAD = 6; + SAVE_AS_DOWNLOAD = 7; + } + optional Reason reason = 19; + // Reserved to make sure there is no overlap with DeepScanningClientRequest. - reserved 1 to 4, 6 to 8; + reserved 1 to 4, 6 to 8, 20; } // Verdict response sent from agent to Google Chrome. |