From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- .../content_analysis_sdk/agent_improvements.patch | 480 +++++++++++++++++++++ 1 file changed, 480 insertions(+) create mode 100644 third_party/content_analysis_sdk/agent_improvements.patch (limited to 'third_party/content_analysis_sdk/agent_improvements.patch') diff --git a/third_party/content_analysis_sdk/agent_improvements.patch b/third_party/content_analysis_sdk/agent_improvements.patch new file mode 100644 index 0000000000..c1475caded --- /dev/null +++ b/third_party/content_analysis_sdk/agent_improvements.patch @@ -0,0 +1,480 @@ +commit 4ad63eb3aa65ce7baa08190aac2770540dc25f43 +Author: Greg Stoll +Date: Wed, 27 Mar 2024 12:13:56 -0500 + + Mozilla improvements to content_analysis_sdk + + - add ability for demo agent to block/warn/report specific regexes + - add ability for demo agent to chose a sequence of delays to apply + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 39477223f031c..5dacc81031117 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -203,6 +203,7 @@ add_executable(agent + ./demo/agent.cc + ./demo/handler.h + ) ++target_compile_features(agent PRIVATE cxx_std_17) + target_include_directories(agent PRIVATE ${AGENT_INCLUDES}) + target_link_libraries(agent PRIVATE cac_agent) + +diff --git a/agent/src/event_win.h b/agent/src/event_win.h +index 9f8b6903566f2..f631f693dcd9c 100644 +--- a/agent/src/event_win.h ++++ b/agent/src/event_win.h +@@ -28,6 +28,12 @@ class ContentAnalysisEventWin : public ContentAnalysisEventBase { + ResultCode Close() override; + ResultCode Send() override; + std::string DebugString() const override; ++ std::string SerializeStringToSendToBrowser() { ++ return agent_to_chrome()->SerializeAsString(); ++ } ++ void SetResponseSent() { response_sent_ = true; } ++ ++ HANDLE Pipe() const { return hPipe_; } + + private: + void Shutdown(); +diff --git a/browser/src/client_win.cc b/browser/src/client_win.cc +index 9d3d7e8c52662..039946d131398 100644 +--- a/browser/src/client_win.cc ++++ b/browser/src/client_win.cc +@@ -418,7 +418,11 @@ DWORD ClientWin::ConnectToPipe(const std::string& pipename, HANDLE* handle) { + + void ClientWin::Shutdown() { + if (hPipe_ != INVALID_HANDLE_VALUE) { +- FlushFileBuffers(hPipe_); ++ // TODO: This trips the LateWriteObserver. We could move this earlier ++ // (before the LateWriteObserver is created) or just remove it, although ++ // the later could mean an ACK message is not processed by the agent ++ // in time. ++ // FlushFileBuffers(hPipe_); + CloseHandle(hPipe_); + hPipe_ = INVALID_HANDLE_VALUE; + } +diff --git a/demo/agent.cc b/demo/agent.cc +index ff8b93f647ebd..3e168b0915a0c 100644 +--- a/demo/agent.cc ++++ b/demo/agent.cc +@@ -2,12 +2,18 @@ + // Use of this source code is governed by a BSD-style license that can be + // found in the LICENSE file. + ++#include + #include + #include + #include ++#include ++#include + + #include "content_analysis/sdk/analysis_agent.h" + #include "demo/handler.h" ++#include "demo/handler_misbehaving.h" ++ ++using namespace content_analysis::sdk; + + // Different paths are used depending on whether this agent should run as a + // use specific agent or not. These values are chosen to match the test +@@ -19,19 +25,50 @@ constexpr char kPathSystem[] = "brcm_chrm_cas"; + std::string path = kPathSystem; + bool use_queue = false; + bool user_specific = false; +-unsigned long delay = 0; // In seconds. ++std::vector delays = {0}; // In seconds. + unsigned long num_threads = 8u; + std::string save_print_data_path = ""; ++RegexArray toBlock, toWarn, toReport; ++static bool useMisbehavingHandler = false; ++static std::string modeStr; + + // Command line parameters. +-constexpr const char* kArgDelaySpecific = "--delay="; ++constexpr const char* kArgDelaySpecific = "--delays="; + constexpr const char* kArgPath = "--path="; + constexpr const char* kArgQueued = "--queued"; + constexpr const char* kArgThreads = "--threads="; + constexpr const char* kArgUserSpecific = "--user"; ++constexpr const char* kArgToBlock = "--toblock="; ++constexpr const char* kArgToWarn = "--towarn="; ++constexpr const char* kArgToReport = "--toreport="; ++constexpr const char* kArgMisbehave = "--misbehave="; + constexpr const char* kArgHelp = "--help"; + constexpr const char* kArgSavePrintRequestDataTo = "--save-print-request-data-to="; + ++std::map sStringToMode = { ++#define AGENT_MODE(name) {#name, Mode::Mode_##name}, ++#include "modes.h" ++#undef AGENT_MODE ++}; ++ ++std::map sModeToString = { ++#define AGENT_MODE(name) {Mode::Mode_##name, #name}, ++#include "modes.h" ++#undef AGENT_MODE ++}; ++ ++std::vector> ++ParseRegex(const std::string str) { ++ std::vector> ret; ++ for (auto it = str.begin(); it != str.end(); /* nop */) { ++ auto it2 = std::find(it, str.end(), ','); ++ ret.push_back(std::make_pair(std::string(it, it2), std::regex(it, it2))); ++ it = it2 == str.end() ? it2 : it2 + 1; ++ } ++ ++ return ret; ++} ++ + bool ParseCommandLine(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + const std::string arg = argv[i]; +@@ -44,16 +81,38 @@ bool ParseCommandLine(int argc, char* argv[]) { + path = kPathUser; + user_specific = true; + } else if (arg.find(kArgDelaySpecific) == 0) { +- delay = std::stoul(arg.substr(strlen(kArgDelaySpecific))); ++ std::string delaysStr = arg.substr(strlen(kArgDelaySpecific)); ++ delays.clear(); ++ size_t posStart = 0, posEnd; ++ unsigned long delay; ++ while ((posEnd = delaysStr.find(',', posStart)) != std::string::npos) { ++ delay = std::stoul(delaysStr.substr(posStart, posEnd - posStart)); ++ if (delay > 30) { ++ delay = 30; ++ } ++ delays.push_back(delay); ++ posStart = posEnd + 1; ++ } ++ delay = std::stoul(delaysStr.substr(posStart)); + if (delay > 30) { + delay = 30; + } ++ delays.push_back(delay); + } else if (arg.find(kArgPath) == 0) { + path = arg.substr(strlen(kArgPath)); + } else if (arg.find(kArgQueued) == 0) { + use_queue = true; + } else if (arg.find(kArgThreads) == 0) { + num_threads = std::stoul(arg.substr(strlen(kArgThreads))); ++ } else if (arg.find(kArgToBlock) == 0) { ++ toBlock = ParseRegex(arg.substr(strlen(kArgToBlock))); ++ } else if (arg.find(kArgToWarn) == 0) { ++ toWarn = ParseRegex(arg.substr(strlen(kArgToWarn))); ++ } else if (arg.find(kArgToReport) == 0) { ++ toReport = ParseRegex(arg.substr(strlen(kArgToReport))); ++ } else if (arg.find(kArgMisbehave) == 0) { ++ modeStr = arg.substr(strlen(kArgMisbehave)); ++ useMisbehavingHandler = true; + } else if (arg.find(kArgHelp) == 0) { + return false; + } else if (arg.find(kArgSavePrintRequestDataTo) == 0) { +@@ -72,13 +131,17 @@ void PrintHelp() { + << "A simple agent to process content analysis requests." << std::endl + << "Data containing the string 'block' blocks the request data from being used." << std::endl + << std::endl << "Options:" << std::endl +- << kArgDelaySpecific << " : Add a delay to request processing in seconds (max 30)." << std::endl ++ << kArgDelaySpecific << " : Add delays to request processing in seconds. Delays are limited to 30 seconds and are applied round-robin to requests. Default is 0." << std::endl + << kArgPath << " : Used the specified path instead of default. Must come after --user." << std::endl + << kArgQueued << " : Queue requests for processing in a background thread" << std::endl + << kArgThreads << " : When queued, number of threads in the request processing thread pool" << std::endl + << kArgUserSpecific << " : Make agent OS user specific." << std::endl + << kArgHelp << " : prints this help message" << std::endl +- << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests"; ++ << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests" << std::endl ++ << kArgToBlock << " : Regular expression matching file and text content to block." << std::endl ++ << kArgToWarn << " : Regular expression matching file and text content to warn about." << std::endl ++ << kArgToReport << " : Regular expression matching file and text content to report." << std::endl ++ << kArgMisbehave << " : Use 'misbehaving' agent in given mode for testing purposes." << std::endl; + } + + int main(int argc, char* argv[]) { +@@ -87,9 +150,17 @@ int main(int argc, char* argv[]) { + return 1; + } + +- auto handler = use_queue +- ? std::make_unique(num_threads, delay, save_print_data_path) +- : std::make_unique(delay, save_print_data_path); ++ auto handler = ++ useMisbehavingHandler ++ ? MisbehavingHandler::Create(modeStr, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) ++ : use_queue ++ ? std::make_unique(num_threads, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) ++ : std::make_unique(std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)); ++ ++ if (!handler) { ++ std::cout << "[Demo] Failed to construct handler." << std::endl; ++ return 1; ++ } + + // Each agent uses a unique name to identify itself with Google Chrome. + content_analysis::sdk::ResultCode rc; +diff --git a/demo/handler.h b/demo/handler.h +index 9d1ccfdf9857a..88599963c51b0 100644 +--- a/demo/handler.h ++++ b/demo/handler.h +@@ -7,31 +7,51 @@ + + #include + ++#include ++#include + #include + #include + #include + #include ++#include + #include + #include ++#include + #include + + #include "content_analysis/sdk/analysis_agent.h" + #include "demo/atomic_output.h" + #include "demo/request_queue.h" + ++using RegexArray = std::vector>; ++ + // An AgentEventHandler that dumps requests information to stdout and blocks + // any requests that have the keyword "block" in their data + class Handler : public content_analysis::sdk::AgentEventHandler { + public: + using Event = content_analysis::sdk::ContentAnalysisEvent; + +- Handler(unsigned long delay, const std::string& print_data_file_path) : +- delay_(delay), print_data_file_path_(print_data_file_path) { +- } ++ Handler(std::vector&& delays, const std::string& print_data_file_path, ++ RegexArray&& toBlock = RegexArray(), ++ RegexArray&& toWarn = RegexArray(), ++ RegexArray&& toReport = RegexArray()) : ++ toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)), ++ delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {} + +- unsigned long delay() { return delay_; } ++ const std::vector delays() { return delays_; } ++ size_t nextDelayIndex() const { return nextDelayIndex_; } + + protected: ++ // subclasses can override this ++ // returns whether the response has been set ++ virtual bool SetCustomResponse(AtomicCout& aout, std::unique_ptr& event) { ++ return false; ++ } ++ // subclasses can override this ++ // returns whether the response has been sent ++ virtual bool SendCustomResponse(std::unique_ptr& event) { ++ return false; ++ } + // Analyzes one request from Google Chrome and responds back to the browser + // with either an allow or block verdict. + void AnalyzeContent(AtomicCout& aout, std::unique_ptr event) { +@@ -43,29 +63,25 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + + DumpEvent(aout.stream(), event.get()); + +- bool block = false; + bool success = true; +- unsigned long delay = delay_; +- +- if (event->GetRequest().has_text_content()) { +- block = ShouldBlockRequest( +- event->GetRequest().text_content()); +- GetFileSpecificDelay(event->GetRequest().text_content(), &delay); +- } else if (event->GetRequest().has_file_path()) { +- std::string content; +- success = +- ReadContentFromFile(event->GetRequest().file_path(), +- &content); +- if (success) { +- block = ShouldBlockRequest(content); +- GetFileSpecificDelay(content, &delay); ++ std::optional caResponse; ++ bool setResponse = SetCustomResponse(aout, event); ++ if (!setResponse) { ++ caResponse = content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; ++ if (event->GetRequest().has_text_content()) { ++ caResponse = DecideCAResponse( ++ event->GetRequest().text_content(), aout.stream()); ++ } else if (event->GetRequest().has_file_path()) { ++ // TODO: Fix downloads to store file *first* so we can check contents. ++ // Until then, just check the file name: ++ caResponse = DecideCAResponse( ++ event->GetRequest().file_path(), aout.stream()); ++ } else if (event->GetRequest().has_print_data()) { ++ // In the case of print request, normally the PDF bytes would be parsed ++ // for sensitive data violations. To keep this class simple, only the ++ // URL is checked for the word "block". ++ caResponse = DecideCAResponse(event->GetRequest().request_data().url(), aout.stream()); + } +- } else if (event->GetRequest().has_print_data()) { +- // In the case of print request, normally the PDF bytes would be parsed +- // for sensitive data violations. To keep this class simple, only the +- // URL is checked for the word "block". +- block = ShouldBlockRequest(event->GetRequest().request_data().url()); +- GetFileSpecificDelay(event->GetRequest().request_data().url(), &delay); + } + + if (!success) { +@@ -75,22 +91,44 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE); + aout.stream() << " Verdict: failed to reach verdict: "; + aout.stream() << event->DebugString() << std::endl; +- } else if (block) { +- auto rc = content_analysis::sdk::SetEventVerdictToBlock(event.get()); +- aout.stream() << " Verdict: block"; +- if (rc != content_analysis::sdk::ResultCode::OK) { +- aout.stream() << " error: " +- << content_analysis::sdk::ResultCodeToString(rc) << std::endl; +- aout.stream() << " " << event->DebugString() << std::endl; ++ } else { ++ aout.stream() << " Verdict: "; ++ if (caResponse) { ++ switch (caResponse.value()) { ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK: ++ aout.stream() << "BLOCK"; ++ break; ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN: ++ aout.stream() << "WARN"; ++ break; ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY: ++ aout.stream() << "REPORT_ONLY"; ++ break; ++ case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED: ++ aout.stream() << "ACTION_UNSPECIFIED"; ++ break; ++ default: ++ aout.stream() << ""; ++ break; ++ } ++ auto rc = ++ content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value()); ++ if (rc != content_analysis::sdk::ResultCode::OK) { ++ aout.stream() << " error: " ++ << content_analysis::sdk::ResultCodeToString(rc) << std::endl; ++ aout.stream() << " " << event->DebugString() << std::endl; ++ } ++ aout.stream() << std::endl; ++ } else { ++ aout.stream() << " Verdict: allow" << std::endl; + } + aout.stream() << std::endl; +- } else { +- aout.stream() << " Verdict: allow" << std::endl; + } +- + aout.stream() << std::endl; + + // If a delay is specified, wait that much. ++ size_t nextDelayIndex = nextDelayIndex_.fetch_add(1); ++ unsigned long delay = delays_[nextDelayIndex % delays_.size()]; + if (delay > 0) { + aout.stream() << "Delaying response to " << event->GetRequest().request_token() + << " for " << delay << "s" << std::endl<< std::endl; +@@ -99,16 +137,19 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + } + + // Send the response back to Google Chrome. +- auto rc = event->Send(); +- if (rc != content_analysis::sdk::ResultCode::OK) { +- aout.stream() << "[Demo] Error sending response: " +- << content_analysis::sdk::ResultCodeToString(rc) +- << std::endl; +- aout.stream() << event->DebugString() << std::endl; ++ bool sentCustomResponse = SendCustomResponse(event); ++ if (!sentCustomResponse) { ++ auto rc = event->Send(); ++ if (rc != content_analysis::sdk::ResultCode::OK) { ++ aout.stream() << "[Demo] Error sending response: " ++ << content_analysis::sdk::ResultCodeToString(rc) ++ << std::endl; ++ aout.stream() << event->DebugString() << std::endl; ++ } + } + } + +- private: ++ protected: + void OnBrowserConnected( + const content_analysis::sdk::BrowserInfo& info) override { + AtomicCout aout; +@@ -362,21 +403,40 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + return true; + } + +- bool ShouldBlockRequest(const std::string& content) { +- // Determines if the request should be blocked. For this simple example +- // the content is blocked if the string "block" is found. Otherwise the +- // content is allowed. +- return content.find("block") != std::string::npos; +- } +- +- void GetFileSpecificDelay(const std::string& content, unsigned long* delay) { +- auto pos = content.find("delay="); +- if (pos != std::string::npos) { +- std::sscanf(content.substr(pos).c_str(), "delay=%lu", delay); ++ std::optional ++ DecideCAResponse(const std::string& content, std::stringstream& stream) { ++ for (auto& r : toBlock_) { ++ if (std::regex_search(content, r.second)) { ++ stream << "'" << content << "' matches BLOCK regex '" ++ << r.first << "'" << std::endl; ++ return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; ++ } + } ++ for (auto& r : toWarn_) { ++ if (std::regex_search(content, r.second)) { ++ stream << "'" << content << "' matches WARN regex '" ++ << r.first << "'" << std::endl; ++ return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN; ++ } ++ } ++ for (auto& r : toReport_) { ++ if (std::regex_search(content, r.second)) { ++ stream << "'" << content << "' matches REPORT_ONLY regex '" ++ << r.first << "'" << std::endl; ++ return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY; ++ } ++ } ++ stream << "'" << content << "' was ALLOWed\n"; ++ return {}; + } + +- unsigned long delay_; ++ // For the demo, block any content that matches these wildcards. ++ RegexArray toBlock_; ++ RegexArray toWarn_; ++ RegexArray toReport_; ++ ++ std::vector delays_; ++ std::atomic nextDelayIndex_; + std::string print_data_file_path_; + }; + +@@ -384,8 +444,11 @@ class Handler : public content_analysis::sdk::AgentEventHandler { + // any requests that have the keyword "block" in their data + class QueuingHandler : public Handler { + public: +- QueuingHandler(unsigned long threads, unsigned long delay, const std::string& print_data_file_path) +- : Handler(delay, print_data_file_path) { ++ QueuingHandler(unsigned long threads, std::vector&& delays, const std::string& print_data_file_path, ++ RegexArray&& toBlock = RegexArray(), ++ RegexArray&& toWarn = RegexArray(), ++ RegexArray&& toReport = RegexArray()) ++ : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) { + StartBackgroundThreads(threads); + } + +@@ -421,6 +484,8 @@ class QueuingHandler : public Handler { + aout.stream() << std::endl << "----------" << std::endl; + aout.stream() << "Thread: " << std::this_thread::get_id() + << std::endl; ++ aout.stream() << "Delaying request processing for " ++ << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "s" << std::endl << std::endl; + aout.flush(); + + handler->AnalyzeContent(aout, std::move(event)); +-- +2.42.0.windows.2 + -- cgit v1.2.3