diff options
Diffstat (limited to 'third_party/content_analysis_sdk/demo')
-rw-r--r-- | third_party/content_analysis_sdk/demo/README.md | 16 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/agent.cc | 189 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/atomic_output.h | 29 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/client.cc | 411 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/handler.h | 449 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/handler_misbehaving.h | 495 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/modes.h | 25 | ||||
-rw-r--r-- | third_party/content_analysis_sdk/demo/request_queue.h | 70 |
8 files changed, 1684 insertions, 0 deletions
diff --git a/third_party/content_analysis_sdk/demo/README.md b/third_party/content_analysis_sdk/demo/README.md new file mode 100644 index 0000000000..0f22912cc1 --- /dev/null +++ b/third_party/content_analysis_sdk/demo/README.md @@ -0,0 +1,16 @@ +# Google Chrome Content Analysis Connector Agent SDK Demo + +This directory holds the Google Chrome Content Analysis Connector Agent SDK Demo. +It contains an example of how to use the SDK. + +Build instructions are available in the main project `README.md`. + +## Demo agent permissions +On Microsoft Windows, if the demo agent is run without the `--user` command line +argument it must have Administrator privileges in order to properly create the +pipe used to communicate with the browser. The demo browser must also be run +without the `--user` command line argument. + +Otherwise the agent may run as any user, with or without Administrator +privileges. The demo browser must also be run with the `--user` command line +argument and run as the same user.
\ No newline at end of file diff --git a/third_party/content_analysis_sdk/demo/agent.cc b/third_party/content_analysis_sdk/demo/agent.cc new file mode 100644 index 0000000000..c3640018e6 --- /dev/null +++ b/third_party/content_analysis_sdk/demo/agent.cc @@ -0,0 +1,189 @@ +// Copyright 2022 The Chromium Authors. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <string> +#include <regex> +#include <vector> + +#include "content_analysis/sdk/analysis_agent.h" +#include "demo/handler.h" +#include "demo/handler_misbehaving.h" + +using namespace content_analysis::sdk; + +// Different paths are used depending on whether this agent should run as a +// use specific agent or not. These values are chosen to match the test +// values in chrome browser. +constexpr char kPathUser[] = "path_user"; +constexpr char kPathSystem[] = "brcm_chrm_cas"; + +// Global app config. +std::string path = kPathSystem; +bool use_queue = false; +bool user_specific = false; +std::vector<unsigned long> delays = {0}; // In seconds. +unsigned long num_threads = 8u; +std::string save_print_data_path = ""; +RegexArray toBlock, toWarn, toReport; +static bool useMisbehavingHandler = false; +static std::string modeStr; + +// Command line parameters. +constexpr const char* kArgDelaySpecific = "--delays="; +constexpr const char* kArgPath = "--path="; +constexpr const char* kArgQueued = "--queued"; +constexpr const char* kArgThreads = "--threads="; +constexpr const char* kArgUserSpecific = "--user"; +constexpr const char* kArgToBlock = "--toblock="; +constexpr const char* kArgToWarn = "--towarn="; +constexpr const char* kArgToReport = "--toreport="; +constexpr const char* kArgMisbehave = "--misbehave="; +constexpr const char* kArgHelp = "--help"; +constexpr const char* kArgSavePrintRequestDataTo = "--save-print-request-data-to="; + +std::map<std::string, Mode> sStringToMode = { +#define AGENT_MODE(name) {#name, Mode::Mode_##name}, +#include "modes.h" +#undef AGENT_MODE +}; + +std::map<Mode, std::string> sModeToString = { +#define AGENT_MODE(name) {Mode::Mode_##name, #name}, +#include "modes.h" +#undef AGENT_MODE +}; + +std::vector<std::pair<std::string, std::regex>> +ParseRegex(const std::string str) { + std::vector<std::pair<std::string, std::regex>> ret; + for (auto it = str.begin(); it != str.end(); /* nop */) { + auto it2 = std::find(it, str.end(), ','); + ret.push_back(std::make_pair(std::string(it, it2), std::regex(it, it2))); + it = it2 == str.end() ? it2 : it2 + 1; + } + + return ret; +} + +bool ParseCommandLine(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + const std::string arg = argv[i]; + if (arg.find(kArgUserSpecific) == 0) { + // If kArgPath was already used, abort. + if (path != kPathSystem) { + std::cout << std::endl << "ERROR: use --path=<path> after --user"; + return false; + } + path = kPathUser; + user_specific = true; + } else if (arg.find(kArgDelaySpecific) == 0) { + std::string delaysStr = arg.substr(strlen(kArgDelaySpecific)); + delays.clear(); + size_t posStart = 0, posEnd; + unsigned long delay; + while ((posEnd = delaysStr.find(',', posStart)) != std::string::npos) { + delay = std::stoul(delaysStr.substr(posStart, posEnd - posStart)); + if (delay > 30) { + delay = 30; + } + delays.push_back(delay); + posStart = posEnd + 1; + } + delay = std::stoul(delaysStr.substr(posStart)); + if (delay > 30) { + delay = 30; + } + delays.push_back(delay); + } else if (arg.find(kArgPath) == 0) { + path = arg.substr(strlen(kArgPath)); + } else if (arg.find(kArgQueued) == 0) { + use_queue = true; + } else if (arg.find(kArgThreads) == 0) { + num_threads = std::stoul(arg.substr(strlen(kArgThreads))); + } else if (arg.find(kArgToBlock) == 0) { + toBlock = ParseRegex(arg.substr(strlen(kArgToBlock))); + } else if (arg.find(kArgToWarn) == 0) { + toWarn = ParseRegex(arg.substr(strlen(kArgToWarn))); + } else if (arg.find(kArgToReport) == 0) { + toReport = ParseRegex(arg.substr(strlen(kArgToReport))); + } else if (arg.find(kArgMisbehave) == 0) { + modeStr = arg.substr(strlen(kArgMisbehave)); + useMisbehavingHandler = true; + } else if (arg.find(kArgHelp) == 0) { + return false; + } else if (arg.find(kArgSavePrintRequestDataTo) == 0) { + int arg_len = strlen(kArgSavePrintRequestDataTo); + save_print_data_path = arg.substr(arg_len); + } + } + + return true; +} + +void PrintHelp() { + std::cout + << std::endl << std::endl + << "Usage: agent [OPTIONS]" << std::endl + << "A simple agent to process content analysis requests." << std::endl + << "Data containing the string 'block' blocks the request data from being used." << std::endl + << std::endl << "Options:" << std::endl + << kArgDelaySpecific << "<delay1,delay2,...> : Add delays to request processing in seconds. Delays are limited to 30 seconds and are applied round-robin to requests. Default is 0." << std::endl + << kArgPath << " <path> : Used the specified path instead of default. Must come after --user." << std::endl + << kArgQueued << " : Queue requests for processing in a background thread" << std::endl + << kArgThreads << " : When queued, number of threads in the request processing thread pool" << std::endl + << kArgUserSpecific << " : Make agent OS user specific." << std::endl + << kArgSavePrintRequestDataTo << " : saves the PDF data to the given file path for print requests" << std::endl + << kArgToBlock << "<regex> : Regular expression matching file and text content to block." << std::endl + << kArgToWarn << "<regex> : Regular expression matching file and text content to warn about." << std::endl + << kArgToReport << "<regex> : Regular expression matching file and text content to report." << std::endl + << kArgMisbehave << "<mode> : Use 'misbehaving' agent in given mode for testing purposes." << std::endl + << kArgHelp << " : prints this help message" << std::endl; +} + +int main(int argc, char* argv[]) { + if (!ParseCommandLine(argc, argv)) { + PrintHelp(); + return 1; + } + + // TODO: Add toBlock, toWarn, toReport to QueueingHandler + auto handler = + useMisbehavingHandler + ? MisbehavingHandler::Create(delays[0], modeStr) + : use_queue + ? std::make_unique<QueuingHandler>(num_threads, std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) + : std::make_unique<Handler>(std::move(delays), save_print_data_path, std::move(toBlock), std::move(toWarn), std::move(toReport)); + + if (!handler) { + std::cout << "[Demo] Failed to construct handler." << std::endl; + return 1; + } + + // Each agent uses a unique name to identify itself with Google Chrome. + content_analysis::sdk::ResultCode rc; + auto agent = content_analysis::sdk::Agent::Create( + {path, user_specific}, std::move(handler), &rc); + if (!agent || rc != content_analysis::sdk::ResultCode::OK) { + std::cout << "[Demo] Error starting agent: " + << content_analysis::sdk::ResultCodeToString(rc) + << std::endl; + return 1; + }; + + std::cout << "[Demo] " << agent->DebugString() << std::endl; + + // Blocks, sending events to the handler until agent->Stop() is called. + rc = agent->HandleEvents(); + if (rc != content_analysis::sdk::ResultCode::OK) { + std::cout << "[Demo] Error from handling events: " + << content_analysis::sdk::ResultCodeToString(rc) + << std::endl; + std::cout << "[Demo] " << agent->DebugString() << std::endl; + } + + return 0; +} diff --git a/third_party/content_analysis_sdk/demo/atomic_output.h b/third_party/content_analysis_sdk/demo/atomic_output.h new file mode 100644 index 0000000000..86ca8cdd75 --- /dev/null +++ b/third_party/content_analysis_sdk/demo/atomic_output.h @@ -0,0 +1,29 @@ +// Copyright 2022 The Chromium Authors. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <iostream> +#include <sstream> +#include <string> + +// Utility class to atomically write outout to std::cout. All data streamed +// the class is automatically sent to std::cout in the dtor. This is useful +// to keep the output of multiple threads writing to std::Cout from +// interleaving. + +class AtomicCout { + public: + ~AtomicCout() { + flush(); + } + + std::stringstream& stream() { return stream_; } + + void flush() { + std::cout << stream_.str(); + stream_.str(std::string()); + } + + private: + std::stringstream stream_; +};
\ No newline at end of file diff --git a/third_party/content_analysis_sdk/demo/client.cc b/third_party/content_analysis_sdk/demo/client.cc new file mode 100644 index 0000000000..5e47fca57f --- /dev/null +++ b/third_party/content_analysis_sdk/demo/client.cc @@ -0,0 +1,411 @@ +// Copyright 2022 The Chromium Authors. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <time.h> + +#include <iostream> +#include <memory> +#include <mutex> +#include <sstream> +#include <string> +#include <thread> +#include <vector> + +#include "content_analysis/sdk/analysis_client.h" +#include "demo/atomic_output.h" + +using content_analysis::sdk::Client; +using content_analysis::sdk::ContentAnalysisRequest; +using content_analysis::sdk::ContentAnalysisResponse; +using content_analysis::sdk::ContentAnalysisAcknowledgement; + +// Different paths are used depending on whether this agent should run as a +// use specific agent or not. These values are chosen to match the test +// values in chrome browser. +constexpr char kPathUser[] = "path_user"; +constexpr char kPathSystem[] = "brcm_chrm_cas"; + +// Global app config. +std::string path = kPathSystem; +bool user_specific = false; +bool group = false; +std::unique_ptr<Client> client; + +// Paramters used to build the request. +content_analysis::sdk::AnalysisConnector connector = + content_analysis::sdk::FILE_ATTACHED; +time_t request_token_number = time(nullptr); +std::string request_token; +std::string tag = "dlp"; +bool threaded = false; +std::string digest = "sha256-123456"; +std::string url = "https://upload.example.com"; +std::string email = "me@example.com"; +std::string machine_user = "DOMAIN\\me"; +std::vector<std::string> datas; + +// When grouping, remember the tokens of all requests/responses in order to +// acknowledge them all with the same final action. +// +// This global state. It may be access from multiple thread so must be +// accessed from a critical section. +std::mutex global_mutex; +ContentAnalysisAcknowledgement::FinalAction global_final_action = + ContentAnalysisAcknowledgement::ALLOW; +std::vector<std::string> request_tokens; + +// Command line parameters. +constexpr const char* kArgConnector = "--connector="; +constexpr const char* kArgDigest = "--digest="; +constexpr const char* kArgEmail = "--email="; +constexpr const char* kArgGroup = "--group"; +constexpr const char* kArgMachineUser = "--machine-user="; +constexpr const char* kArgPath = "--path="; +constexpr const char* kArgRequestToken = "--request-token="; +constexpr const char* kArgTag = "--tag="; +constexpr const char* kArgThreaded = "--threaded"; +constexpr const char* kArgUrl = "--url="; +constexpr const char* kArgUserSpecific = "--user"; +constexpr const char* kArgHelp = "--help"; + +bool ParseCommandLine(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + const std::string arg = argv[i]; + if (arg.find(kArgConnector) == 0) { + std::string connector_str = arg.substr(strlen(kArgConnector)); + if (connector_str == "download") { + connector = content_analysis::sdk::FILE_DOWNLOADED; + } else if (connector_str == "attach") { + connector = content_analysis::sdk::FILE_ATTACHED; + } else if (connector_str == "bulk-data-entry") { + connector = content_analysis::sdk::BULK_DATA_ENTRY; + } else if (connector_str == "print") { + connector = content_analysis::sdk::PRINT; + } else if (connector_str == "file-transfer") { + connector = content_analysis::sdk::FILE_TRANSFER; + } else { + std::cout << "[Demo] Incorrect command line arg: " << arg << std::endl; + return false; + } + } else if (arg.find(kArgRequestToken) == 0) { + request_token = arg.substr(strlen(kArgRequestToken)); + } else if (arg.find(kArgTag) == 0) { + tag = arg.substr(strlen(kArgTag)); + } else if (arg.find(kArgThreaded) == 0) { + threaded = true; + } else if (arg.find(kArgDigest) == 0) { + digest = arg.substr(strlen(kArgDigest)); + } else if (arg.find(kArgUrl) == 0) { + url = arg.substr(strlen(kArgUrl)); + } else if (arg.find(kArgMachineUser) == 0) { + machine_user = arg.substr(strlen(kArgMachineUser)); + } else if (arg.find(kArgEmail) == 0) { + email = arg.substr(strlen(kArgEmail)); + } else if (arg.find(kArgPath) == 0) { + path = arg.substr(strlen(kArgPath)); + } else if (arg.find(kArgUserSpecific) == 0) { + // If kArgPath was already used, abort. + if (path != kPathSystem) { + std::cout << std::endl << "ERROR: use --path=<path> after --user"; + return false; + } + path = kPathUser; + user_specific = true; + } else if (arg.find(kArgGroup) == 0) { + group = true; + } else if (arg.find(kArgHelp) == 0) { + return false; + } else { + datas.push_back(arg); + } + } + + return true; +} + +void PrintHelp() { + std::cout + << std::endl << std::endl + << "Usage: client [OPTIONS] [@]content_or_file ..." << std::endl + << "A simple client to send content analysis requests to a running agent." << std::endl + << "Without @ the content to analyze is the argument itself." << std::endl + << "Otherwise the content is read from a file called 'content_or_file'." << std::endl + << "Multiple [@]content_or_file arguments may be specified, each generates one request." << std::endl + << std::endl << "Options:" << std::endl + << kArgConnector << "<connector> : one of 'download', 'attach' (default), 'bulk-data-entry', 'print', or 'file-transfer'" << std::endl + << kArgRequestToken << "<unique-token> : defaults to 'req-<number>' which auto increments" << std::endl + << kArgTag << "<tag> : defaults to 'dlp'" << std::endl + << kArgThreaded << " : handled multiple requests using threads" << std::endl + << kArgUrl << "<url> : defaults to 'https://upload.example.com'" << std::endl + << kArgMachineUser << "<machine-user> : defaults to 'DOMAIN\\me'" << std::endl + << kArgEmail << "<email> : defaults to 'me@example.com'" << std::endl + << kArgPath << " <path> : Used the specified path instead of default. Must come after --user." << std::endl + << kArgUserSpecific << " : Connects to an OS user specific agent" << std::endl + << kArgDigest << "<digest> : defaults to 'sha256-123456'" << std::endl + << kArgGroup << " : Generate the same final action for all requests" << std::endl + << kArgHelp << " : prints this help message" << std::endl; +} + +std::string GenerateRequestToken() { + std::stringstream stm; + stm << "req-" << request_token_number++; + return stm.str(); +} + +ContentAnalysisRequest BuildRequest(const std::string& data) { + std::string filepath; + std::string filename; + if (data[0] == '@') { + filepath = data.substr(1); + filename = filepath.substr(filepath.find_last_of("/\\") + 1); + } + + ContentAnalysisRequest request; + + // Set request to expire 5 minutes into the future. + request.set_expires_at(time(nullptr) + 5 * 60); + request.set_analysis_connector(connector); + request.set_request_token(!request_token.empty() + ? request_token : GenerateRequestToken()); + *request.add_tags() = tag; + + auto request_data = request.mutable_request_data(); + request_data->set_url(url); + request_data->set_email(email); + request_data->set_digest(digest); + if (!filename.empty()) { + request_data->set_filename(filename); + } + + auto client_metadata = request.mutable_client_metadata(); + auto browser = client_metadata->mutable_browser(); + browser->set_machine_user(machine_user); + + if (!filepath.empty()) { + request.set_file_path(filepath); + } else if (!data.empty()) { + request.set_text_content(data); + } else { + std::cout << "[Demo] Specify text content or a file path." << std::endl; + PrintHelp(); + exit(1); + } + + return request; +} + +// Gets the most severe action within the result. +ContentAnalysisResponse::Result::TriggeredRule::Action +GetActionFromResult(const ContentAnalysisResponse::Result& result) { + auto action = + ContentAnalysisResponse::Result::TriggeredRule::ACTION_UNSPECIFIED; + for (auto rule : result.triggered_rules()) { + if (rule.has_action() && rule.action() > action) + action = rule.action(); + } + return action; +} + +// Gets the most severe action within all the the results of a response. +ContentAnalysisResponse::Result::TriggeredRule::Action +GetActionFromResponse(const ContentAnalysisResponse& response) { + auto action = + ContentAnalysisResponse::Result::TriggeredRule::ACTION_UNSPECIFIED; + for (auto result : response.results()) { + auto action2 = GetActionFromResult(result); + if (action2 > action) + action = action2; + } + return action; +} + +void DumpResponse( + std::stringstream& stream, + const ContentAnalysisResponse& response) { + for (auto result : response.results()) { + auto tag = result.has_tag() ? result.tag() : "<no-tag>"; + + auto status = result.has_status() + ? result.status() + : ContentAnalysisResponse::Result::STATUS_UNKNOWN; + std::string status_str; + switch (status) { + case ContentAnalysisResponse::Result::STATUS_UNKNOWN: + status_str = "Unknown"; + break; + case ContentAnalysisResponse::Result::SUCCESS: + status_str = "Success"; + break; + case ContentAnalysisResponse::Result::FAILURE: + status_str = "Failure"; + break; + default: + status_str = "<Uknown>"; + break; + } + + auto action = GetActionFromResult(result); + std::string action_str; + switch (action) { + case ContentAnalysisResponse::Result::TriggeredRule::ACTION_UNSPECIFIED: + action_str = "allowed"; + break; + case ContentAnalysisResponse::Result::TriggeredRule::REPORT_ONLY: + action_str = "reported only"; + break; + case ContentAnalysisResponse::Result::TriggeredRule::WARN: + action_str = "warned"; + break; + case ContentAnalysisResponse::Result::TriggeredRule::BLOCK: + action_str = "blocked"; + break; + } + + time_t now = time(nullptr); + stream << "[Demo] Request " << response.request_token() << " is " << action_str + << " after " << tag + << " analysis, status=" << status_str + << " at " << ctime(&now); + } +} + +ContentAnalysisAcknowledgement BuildAcknowledgement( + const std::string& request_token, + ContentAnalysisAcknowledgement::FinalAction final_action) { + ContentAnalysisAcknowledgement ack; + ack.set_request_token(request_token); + ack.set_status(ContentAnalysisAcknowledgement::SUCCESS); + ack.set_final_action(final_action); + return ack; +} + +void HandleRequest(const ContentAnalysisRequest& request) { + AtomicCout aout; + ContentAnalysisResponse response; + int err = client->Send(request, &response); + if (err != 0) { + aout.stream() << "[Demo] Error sending request " << request.request_token() + << std::endl; + } else if (response.results_size() == 0) { + aout.stream() << "[Demo] Response " << request.request_token() << " is missing a result" + << std::endl; + } else { + DumpResponse(aout.stream(), response); + + auto final_action = ContentAnalysisAcknowledgement::ALLOW; + switch (GetActionFromResponse(response)) { + case ContentAnalysisResponse::Result::TriggeredRule::ACTION_UNSPECIFIED: + break; + case ContentAnalysisResponse::Result::TriggeredRule::REPORT_ONLY: + final_action = ContentAnalysisAcknowledgement::REPORT_ONLY; + break; + case ContentAnalysisResponse::Result::TriggeredRule::WARN: + final_action = ContentAnalysisAcknowledgement::WARN; + break; + case ContentAnalysisResponse::Result::TriggeredRule::BLOCK: + final_action = ContentAnalysisAcknowledgement::BLOCK; + break; + } + + // If grouping, remember the request's token in order to ack the response + // later. + if (group) { + std::unique_lock<std::mutex> lock(global_mutex); + request_tokens.push_back(request.request_token()); + if (final_action > global_final_action) + global_final_action = final_action; + } else { + int err = client->Acknowledge( + BuildAcknowledgement(request.request_token(), final_action)); + if (err != 0) { + aout.stream() << "[Demo] Error sending ack " << request.request_token() + << std::endl; + } + } + } +} + +void ProcessRequest(size_t i) { + auto request = BuildRequest(datas[i]); + + { + AtomicCout aout; + aout.stream() << "[Demo] Sending request " << request.request_token() << std::endl; + } + + HandleRequest(request); +} + +int main(int argc, char* argv[]) { + if (!ParseCommandLine(argc, argv)) { + PrintHelp(); + return 1; + } + + // Each client uses a unique name to identify itself with Google Chrome. + client = Client::Create({path, user_specific}); + if (!client) { + std::cout << "[Demo] Error starting client" << std::endl; + return 1; + }; + + auto info = client->GetAgentInfo(); + std::cout << "Agent pid=" << info.pid + << " path=" << info.binary_path << std::endl; + + if (threaded) { + std::vector<std::unique_ptr<std::thread>> threads; + for (int i = 0; i < datas.size(); ++i) { + AtomicCout aout; + aout.stream() << "Start thread " << i << std::endl; + threads.emplace_back(std::make_unique<std::thread>(ProcessRequest, i)); + } + + // Make sure all threads have terminated. + for (auto& thread : threads) { + thread->join(); + } + } + else { + for (size_t i = 0; i < datas.size(); ++i) { + ProcessRequest(i); + } + } + // It's safe to access global state beyond this point without locking since + // all no more responses will be touching them. + + if (group) { + std::cout << std::endl; + std::cout << "[Demo] Final action for all requests is "; + switch (global_final_action) { + // Google Chrome fails open, so if no action is specified that is the same + // as ALLOW. + case ContentAnalysisAcknowledgement::ACTION_UNSPECIFIED: + case ContentAnalysisAcknowledgement::ALLOW: + std::cout << "allowed"; + break; + case ContentAnalysisAcknowledgement::REPORT_ONLY: + std::cout << "reported only"; + break; + case ContentAnalysisAcknowledgement::WARN: + std::cout << "warned"; + break; + case ContentAnalysisAcknowledgement::BLOCK: + std::cout << "blocked"; + break; + } + std::cout << std::endl << std::endl; + + for (auto token : request_tokens) { + std::cout << "[Demo] Sending group Ack" << std::endl; + int err = client->Acknowledge( + BuildAcknowledgement(token, global_final_action)); + if (err != 0) { + std::cout << "[Demo] Error sending ack for " << token << std::endl; + } + } + } + + return 0; +}; diff --git a/third_party/content_analysis_sdk/demo/handler.h b/third_party/content_analysis_sdk/demo/handler.h new file mode 100644 index 0000000000..1c9871bd08 --- /dev/null +++ b/third_party/content_analysis_sdk/demo/handler.h @@ -0,0 +1,449 @@ +// Copyright 2022 The Chromium Authors. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_ANALYSIS_DEMO_HANDLER_H_ +#define CONTENT_ANALYSIS_DEMO_HANDLER_H_ + +#include <time.h> + +#include <algorithm> +#include <atomic> +#include <chrono> +#include <fstream> +#include <iostream> +#include <optional> +#include <thread> +#include <utility> +#include <regex> +#include <vector> + +#include "content_analysis/sdk/analysis_agent.h" +#include "demo/atomic_output.h" +#include "demo/request_queue.h" + +using RegexArray = std::vector<std::pair<std::string, std::regex>>; + +// An AgentEventHandler that dumps requests information to stdout and blocks +// any requests that have the keyword "block" in their data +class Handler : public content_analysis::sdk::AgentEventHandler { + public: + using Event = content_analysis::sdk::ContentAnalysisEvent; + + Handler(std::vector<unsigned long>&& delays, const std::string& print_data_file_path, + RegexArray&& toBlock = RegexArray(), + RegexArray&& toWarn = RegexArray(), + RegexArray&& toReport = RegexArray()) : + toBlock_(std::move(toBlock)), toWarn_(std::move(toWarn)), toReport_(std::move(toReport)), + delays_(std::move(delays)), print_data_file_path_(print_data_file_path) {} + + const std::vector<unsigned long> delays() { return delays_; } + size_t nextDelayIndex() const { return nextDelayIndex_; } + + protected: + // Analyzes one request from Google Chrome and responds back to the browser + // with either an allow or block verdict. + void AnalyzeContent(std::stringstream& stream, std::unique_ptr<Event> event) { + // An event represents one content analysis request and response triggered + // by a user action in Google Chrome. The agent determines whether the + // user is allowed to perform the action by examining event->GetRequest(). + // The verdict, which can be "allow" or "block" is written into + // event->GetResponse(). + + DumpEvent(stream, event.get()); + + bool success = true; + std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> caResponse = + content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; + + if (event->GetRequest().has_text_content()) { + caResponse = DecideCAResponse( + event->GetRequest().text_content(), stream); + } else if (event->GetRequest().has_file_path()) { + // TODO: Fix downloads to store file *first* so we can check contents. + // Until then, just check the file name: + caResponse = DecideCAResponse( + event->GetRequest().file_path(), stream); + } else if (event->GetRequest().has_print_data()) { + // In the case of print request, normally the PDF bytes would be parsed + // for sensitive data violations. To keep this class simple, only the + // URL is checked for the word "block". + caResponse = DecideCAResponse(event->GetRequest().request_data().url(), stream); + } + + if (!success) { + content_analysis::sdk::UpdateResponse( + event->GetResponse(), + std::string(), + content_analysis::sdk::ContentAnalysisResponse::Result::FAILURE); + stream << " Verdict: failed to reach verdict: "; + stream << event->DebugString() << std::endl; + } else { + stream << " Verdict: "; + if (caResponse) { + switch (caResponse.value()) { + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK: + stream << "BLOCK"; + break; + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN: + stream << "WARN"; + break; + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY: + stream << "REPORT_ONLY"; + break; + case content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_ACTION_UNSPECIFIED: + stream << "ACTION_UNSPECIFIED"; + break; + default: + stream << "<error>"; + break; + } + auto rc = + content_analysis::sdk::SetEventVerdictTo(event.get(), caResponse.value()); + if (rc != content_analysis::sdk::ResultCode::OK) { + stream << " error: " + << content_analysis::sdk::ResultCodeToString(rc) << std::endl; + stream << " " << event->DebugString() << std::endl; + } + stream << std::endl; + } else { + stream << " Verdict: allow" << std::endl; + } + stream << std::endl; + } + stream << std::endl; + + // If a delay is specified, wait that much. + size_t nextDelayIndex = nextDelayIndex_.fetch_add(1); + unsigned long delay = delays_[nextDelayIndex % delays_.size()]; + if (delay > 0) { + std::this_thread::sleep_for(std::chrono::seconds(delay)); + } + + // Send the response back to Google Chrome. + auto rc = event->Send(); + if (rc != content_analysis::sdk::ResultCode::OK) { + stream << "[Demo] Error sending response: " + << content_analysis::sdk::ResultCodeToString(rc) + << std::endl; + stream << event->DebugString() << std::endl; + } + } + + private: + void OnBrowserConnected( + const content_analysis::sdk::BrowserInfo& info) override { + AtomicCout aout; + aout.stream() << std::endl << "==========" << std::endl; + aout.stream() << "Browser connected pid=" << info.pid + << " path=" << info.binary_path << std::endl; + } + + void OnBrowserDisconnected( + const content_analysis::sdk::BrowserInfo& info) override { + AtomicCout aout; + aout.stream() << std::endl << "Browser disconnected pid=" << info.pid << std::endl; + aout.stream() << "==========" << std::endl; + } + + void OnAnalysisRequested(std::unique_ptr<Event> event) override { + // If the agent is capable of analyzing content in the background, the + // events may be handled in background threads. Having said that, a + // event should not be assumed to be thread safe, that is, it should not + // be accessed by more than one thread concurrently. + // + // In this example code, the event is handled synchronously. + AtomicCout aout; + aout.stream() << std::endl << "----------" << std::endl << std::endl; + AnalyzeContent(aout.stream(), std::move(event)); + } + + void OnResponseAcknowledged( + const content_analysis::sdk::ContentAnalysisAcknowledgement& + ack) override { + const char* final_action = "<Unknown>"; + if (ack.has_final_action()) { + switch (ack.final_action()) { + case content_analysis::sdk::ContentAnalysisAcknowledgement::ACTION_UNSPECIFIED: + final_action = "<Unspecified>"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::ALLOW: + final_action = "Allow"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::REPORT_ONLY: + final_action = "Report only"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::WARN: + final_action = "Warn"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::BLOCK: + final_action = "Block"; + break; + } + } + + AtomicCout aout; + aout.stream() << "Ack: " << ack.request_token() << std::endl; + aout.stream() << " Final action: " << final_action << std::endl; + } + void OnCancelRequests( + const content_analysis::sdk::ContentAnalysisCancelRequests& cancel) + override { + AtomicCout aout; + aout.stream() << "Cancel: " << std::endl; + aout.stream() << " User action ID: " << cancel.user_action_id() << std::endl; + } + + void OnInternalError( + const char* context, + content_analysis::sdk::ResultCode error) override { + AtomicCout aout; + aout.stream() << std::endl + << "*ERROR*: context=\"" << context << "\" " + << content_analysis::sdk::ResultCodeToString(error) + << std::endl; + } + + void DumpEvent(std::stringstream& stream, Event* event) { + time_t now = time(nullptr); + stream << "Received at: " << ctime(&now); // Returned string includes \n. + + const content_analysis::sdk::ContentAnalysisRequest& request = + event->GetRequest(); + std::string connector = "<Unknown>"; + if (request.has_analysis_connector()) { + switch (request.analysis_connector()) + { + case content_analysis::sdk::FILE_DOWNLOADED: + connector = "download"; + break; + case content_analysis::sdk::FILE_ATTACHED: + connector = "attach"; + break; + case content_analysis::sdk::BULK_DATA_ENTRY: + connector = "bulk-data-entry"; + break; + case content_analysis::sdk::PRINT: + connector = "print"; + break; + case content_analysis::sdk::FILE_TRANSFER: + connector = "file-transfer"; + break; + default: + break; + } + } + + std::string url = + request.has_request_data() && request.request_data().has_url() + ? request.request_data().url() : "<No URL>"; + + std::string tab_title = + request.has_request_data() && request.request_data().has_tab_title() + ? request.request_data().tab_title() : "<No tab title>"; + + std::string filename = + request.has_request_data() && request.request_data().has_filename() + ? request.request_data().filename() : "<No filename>"; + + std::string digest = + request.has_request_data() && request.request_data().has_digest() + ? request.request_data().digest() : "<No digest>"; + + std::string file_path = + request.has_file_path() + ? request.file_path() : "<none>"; + + std::string text_content = + request.has_text_content() + ? request.text_content() : "<none>"; + + std::string machine_user = + request.has_client_metadata() && + request.client_metadata().has_browser() && + request.client_metadata().browser().has_machine_user() + ? request.client_metadata().browser().machine_user() : "<No machine user>"; + + std::string email = + request.has_request_data() && request.request_data().has_email() + ? request.request_data().email() : "<No email>"; + + time_t t = request.expires_at(); + std::string expires_at_str = ctime(&t); + // Returned string includes trailing \n, overwrite with null. + expires_at_str[expires_at_str.size() - 1] = 0; + time_t secs_remaining = t - now; + + std::string user_action_id = request.has_user_action_id() + ? request.user_action_id() : "<No user action id>"; + + stream << "Request: " << request.request_token() << std::endl; + stream << " User action ID: " << user_action_id << std::endl; + stream << " Expires at: " << expires_at_str << " (" + << secs_remaining << " seconds from now)" << std::endl; + stream << " Connector: " << connector << std::endl; + stream << " URL: " << url << std::endl; + stream << " Tab title: " << tab_title << std::endl; + stream << " Filename: " << filename << std::endl; + stream << " Digest: " << digest << std::endl; + stream << " Filepath: " << file_path << std::endl; + stream << " Text content: '" << text_content << "'" << std::endl; + stream << " Machine user: " << machine_user << std::endl; + stream << " Email: " << email << std::endl; + if (request.has_print_data() && !print_data_file_path_.empty()) { + if (request.request_data().has_print_metadata() && + request.request_data().print_metadata().has_printer_name()) { + stream << " Printer name: " + << request.request_data().print_metadata().printer_name() + << std::endl; + } else { + stream << " No printer name in request" << std::endl; + } + + stream << " Print data saved to: " << print_data_file_path_ + << std::endl; + using content_analysis::sdk::ContentAnalysisEvent; + auto print_data = + content_analysis::sdk::CreateScopedPrintHandle(event->GetRequest(), + event->GetBrowserInfo().pid); + std::ofstream file(print_data_file_path_, + std::ios::out | std::ios::trunc | std::ios::binary); + file.write(print_data->data(), print_data->size()); + file.flush(); + file.close(); + } + } + + bool ReadContentFromFile(const std::string& file_path, + std::string* content) { + std::ifstream file(file_path, + std::ios::in | std::ios::binary | std::ios::ate); + if (!file.is_open()) + return false; + + // Get file size. This example does not handle files larger than 1MB. + // Make sure content string can hold the contents of the file. + int size = file.tellg(); + if (size > 1024 * 1024) + return false; + + content->resize(size + 1); + + // Read file into string. + file.seekg(0, std::ios::beg); + file.read(&(*content)[0], size); + content->at(size) = 0; + return true; + } + + std::optional<content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action> + DecideCAResponse(const std::string& content, std::stringstream& stream) { + for (auto& r : toBlock_) { + if (std::regex_search(content, r.second)) { + stream << "'" << content << "' matches BLOCK regex '" + << r.first << "'" << std::endl; + return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_BLOCK; + } + } + for (auto& r : toWarn_) { + if (std::regex_search(content, r.second)) { + stream << "'" << content << "' matches WARN regex '" + << r.first << "'" << std::endl; + return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_WARN; + } + } + for (auto& r : toReport_) { + if (std::regex_search(content, r.second)) { + stream << "'" << content << "' matches REPORT_ONLY regex '" + << r.first << "'" << std::endl; + return content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule_Action_REPORT_ONLY; + } + } + stream << "'" << content << "' was ALLOWed\n"; + return {}; + } + + // For the demo, block any content that matches these wildcards. + RegexArray toBlock_; + RegexArray toWarn_; + RegexArray toReport_; + + std::vector<unsigned long> delays_; + std::atomic<size_t> nextDelayIndex_; + std::string print_data_file_path_; +}; + +// An AgentEventHandler that dumps requests information to stdout and blocks +// any requests that have the keyword "block" in their data +class QueuingHandler : public Handler { + public: + QueuingHandler(unsigned long threads, std::vector<unsigned long>&& delays, const std::string& print_data_file_path, + RegexArray&& toBlock = RegexArray(), + RegexArray&& toWarn = RegexArray(), + RegexArray&& toReport = RegexArray()) + : Handler(std::move(delays), print_data_file_path, std::move(toBlock), std::move(toWarn), std::move(toReport)) { + StartBackgroundThreads(threads); + } + + ~QueuingHandler() override { + // Abort background process and wait for it to finish. + request_queue_.abort(); + WaitForBackgroundThread(); + } + + private: + void OnAnalysisRequested(std::unique_ptr<Event> event) override { + { + time_t now = time(nullptr); + const content_analysis::sdk::ContentAnalysisRequest& request = + event->GetRequest(); + AtomicCout aout; + aout.stream() << std::endl << "Queuing request: " << request.request_token() + << " at " << ctime(&now) << std::endl; + } + + request_queue_.push(std::move(event)); + } + + static void* ProcessRequests(void* qh) { + QueuingHandler* handler = reinterpret_cast<QueuingHandler*>(qh); + + while (true) { + auto event = handler->request_queue_.pop(); + if (!event) + break; + + AtomicCout aout; + aout.stream() << std::endl << "----------" << std::endl; + aout.stream() << "Thread: " << std::this_thread::get_id() << std::endl; + aout.stream() << "Delaying request processing for " + << handler->delays()[handler->nextDelayIndex() % handler->delays().size()] << "s" << std::endl << std::endl; + aout.flush(); + + handler->AnalyzeContent(aout.stream(), std::move(event)); + } + + return 0; + } + + // A list of outstanding content analysis requests. + RequestQueue request_queue_; + + void StartBackgroundThreads(unsigned long threads) { + threads_.reserve(threads); + for (unsigned long i = 0; i < threads; ++i) { + threads_.emplace_back(std::make_unique<std::thread>(ProcessRequests, this)); + } + } + + void WaitForBackgroundThread() { + for (auto& thread : threads_) { + thread->join(); + } + } + + // Thread id of backgrond thread. + std::vector<std::unique_ptr<std::thread>> threads_; +}; + +#endif // CONTENT_ANALYSIS_DEMO_HANDLER_H_ diff --git a/third_party/content_analysis_sdk/demo/handler_misbehaving.h b/third_party/content_analysis_sdk/demo/handler_misbehaving.h new file mode 100644 index 0000000000..d303049d98 --- /dev/null +++ b/third_party/content_analysis_sdk/demo/handler_misbehaving.h @@ -0,0 +1,495 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef CONTENT_ANALYSIS_DEMO_HANDLER_MISBEHAVING_H_ +#define CONTENT_ANALYSIS_DEMO_HANDLER_MISBEHAVING_H_ + +#include <time.h> + +#include <algorithm> +#include <chrono> +#include <fstream> +#include <map> +#include <iostream> +#include <utility> +#include <vector> +#include <regex> +#include <windows.h> + +#include "content_analysis/sdk/analysis.pb.h" +#include "content_analysis/sdk/analysis_agent.h" +#include "agent/src/event_win.h" + +enum class Mode { +// Have to use a "Mode_" prefix to avoid preprocessing problems in StringToMode +#define AGENT_MODE(name) Mode_##name, +#include "modes.h" +#undef AGENT_MODE +}; + +extern std::map<std::string, Mode> sStringToMode; +extern std::map<Mode, std::string> sModeToString; + +// Writes a string to the pipe. Returns ERROR_SUCCESS if successful, else +// returns GetLastError() of the write. This function does not return until +// the entire message has been sent (or an error occurs). +static DWORD WriteBigMessageToPipe(HANDLE pipe, const std::string& message) { + std::cout << "[demo] WriteBigMessageToPipe top, message size is " + << message.size() << std::endl; + if (message.empty()) { + return ERROR_SUCCESS; + } + + OVERLAPPED overlapped; + memset(&overlapped, 0, sizeof(overlapped)); + overlapped.hEvent = CreateEvent(/*securityAttr=*/nullptr, + /*manualReset=*/TRUE, + /*initialState=*/FALSE, + /*name=*/nullptr); + if (overlapped.hEvent == nullptr) { + return GetLastError(); + } + + DWORD err = ERROR_SUCCESS; + const char* cursor = message.data(); + for (DWORD size = message.length(); size > 0;) { + std::cout << "[demo] WriteBigMessageToPipe top of loop, remaining size " + << size << std::endl; + if (WriteFile(pipe, cursor, size, /*written=*/nullptr, &overlapped)) { + std::cout << "[demo] WriteBigMessageToPipe: success" << std::endl; + err = ERROR_SUCCESS; + break; + } + + // If an I/O is not pending, return the error. + err = GetLastError(); + if (err != ERROR_IO_PENDING) { + std::cout + << "[demo] WriteBigMessageToPipe: returning error from WriteFile " + << err << std::endl; + break; + } + + DWORD written; + if (!GetOverlappedResult(pipe, &overlapped, &written, /*wait=*/TRUE)) { + err = GetLastError(); + std::cout << "[demo] WriteBigMessageToPipe: returning error from " + "GetOverlappedREsult " + << err << std::endl; + break; + } + + // reset err for the next loop iteration + err = ERROR_SUCCESS; + std::cout << "[demo] WriteBigMessageToPipe: bottom of loop, wrote " + << written << std::endl; + cursor += written; + size -= written; + } + + CloseHandle(overlapped.hEvent); + return err; +} + +// An AgentEventHandler that does various misbehaving things +class MisbehavingHandler final : public content_analysis::sdk::AgentEventHandler { + public: + using Event = content_analysis::sdk::ContentAnalysisEvent; + + static + std::unique_ptr<AgentEventHandler> Create(unsigned long delay, + const std::string& modeStr) { + auto it = sStringToMode.find(modeStr); + if (it == sStringToMode.end()) { + std::cout << "\"" << modeStr << "\"" + << " is not a valid mode!" << std::endl; + return nullptr; + } + + return std::unique_ptr<AgentEventHandler>(new MisbehavingHandler(delay, it->second)); + } + + private: + MisbehavingHandler(unsigned long delay, Mode mode) : delay_(delay), mode_(mode) {} + + template <size_t N> + DWORD SendBytesOverPipe(const unsigned char (&bytes)[N], + const std::unique_ptr<Event>& event) { + content_analysis::sdk::ContentAnalysisEventWin* eventWin = + static_cast<content_analysis::sdk::ContentAnalysisEventWin*>( + event.get()); + HANDLE pipe = eventWin->Pipe(); + std::string s(reinterpret_cast<const char*>(bytes), N); + return WriteBigMessageToPipe(pipe, s); + } + + // Analyzes one request from Google Chrome and responds back to the browser + // with either an allow or block verdict. + void AnalyzeContent(std::unique_ptr<Event> event) { + // An event represents one content analysis request and response triggered + // by a user action in Google Chrome. The agent determines whether the + // user is allowed to perform the action by examining event->GetRequest(). + // The verdict, which can be "allow" or "block" is written into + // event->GetResponse(). + + std::cout << std::endl << "----------" << std::endl << std::endl; + + DumpRequest(event->GetRequest()); + std::cout << "Mode is " << sModeToString[mode_] << std::endl; + + if (mode_ == Mode::Mode_largeResponse) { + for (size_t i = 0; i < 1000; ++i) { + content_analysis::sdk::ContentAnalysisResponse_Result* result = + event->GetResponse().add_results(); + result->set_tag("someTag"); + content_analysis::sdk::ContentAnalysisResponse_Result_TriggeredRule* + triggeredRule = result->add_triggered_rules(); + triggeredRule->set_rule_id("some_id"); + triggeredRule->set_rule_name("some_name"); + } + } else if (mode_ == + Mode::Mode_invalidUtf8StringStartByteIsContinuationByte) { + // protobuf docs say + // "A string must always contain UTF-8 encoded text." + // So let's try something invalid + // Anything with bits 10xxxxxx is only a continuation code point + event->GetResponse().set_request_token("\x80\x41\x41\x41"); + } else if (mode_ == + Mode::Mode_invalidUtf8StringEndsInMiddleOfMultibyteSequence) { + // f0 byte indicates there should be 3 bytes following it, but here + // there are only 2 + event->GetResponse().set_request_token("\x41\xf0\x90\x8d"); + } else if (mode_ == Mode::Mode_invalidUtf8StringOverlongEncoding) { + // codepoint U+20AC, should be encoded in 3 bytes (E2 82 AC) + // instead of 4 + event->GetResponse().set_request_token("\xf0\x82\x82\xac"); + } else if (mode_ == Mode::Mode_invalidUtf8StringMultibyteSequenceTooShort) { + // f0 byte indicates there should be 3 bytes following it, but here + // there are only 2 (\x41 is not a continuation byte) + event->GetResponse().set_request_token("\xf0\x90\x8d\x41"); + } else if (mode_ == Mode::Mode_invalidUtf8StringDecodesToInvalidCodePoint) { + // decodes to U+1FFFFF, but only up to U+10FFFF is a valid code point + event->GetResponse().set_request_token("\xf7\xbf\xbf\xbf"); + } else if (mode_ == Mode::Mode_stringWithEmbeddedNull) { + event->GetResponse().set_request_token("\x41\x00\x41"); + } else if (mode_ == Mode::Mode_zeroResults) { + event->GetResponse().clear_results(); + } else if (mode_ == Mode::Mode_resultWithInvalidStatus) { + // This causes an assertion failure and the process exits + // So we just serialize this ourselves below + /*content_analysis::sdk::ContentAnalysisResponse_Result* result = + event->GetResponse().mutable_results(0); + result->set_status( + static_cast< + ::content_analysis::sdk::ContentAnalysisResponse_Result_Status>( + 100));*/ + } else { + bool block = false; + + if (event->GetRequest().has_text_content()) { + block = ShouldBlockRequest(event->GetRequest().text_content()); + } else if (event->GetRequest().has_file_path()) { + block = ShouldBlockRequest(event->GetRequest().file_path()); + } + + if (block) { + auto rc = content_analysis::sdk::SetEventVerdictToBlock(event.get()); + std::cout << " Verdict: block"; + if (rc != content_analysis::sdk::ResultCode::OK) { + std::cout << " error: " + << content_analysis::sdk::ResultCodeToString(rc) + << std::endl; + std::cout << " " << event->DebugString() << std::endl; + } + std::cout << std::endl; + } else { + std::cout << " Verdict: allow" << std::endl; + } + } + + std::cout << std::endl; + + // If a delay is specified, wait that much. + if (delay_ > 0) { + std::cout << "[Demo] delaying request processing for " << delay_ << "s" + << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(delay_)); + } + + if (mode_ == Mode::Mode_largeResponse) { + content_analysis::sdk::ContentAnalysisEventWin* eventWin = + static_cast<content_analysis::sdk::ContentAnalysisEventWin*>( + event.get()); + HANDLE pipe = eventWin->Pipe(); + std::cout << "largeResponse about to write" << std::endl; + DWORD result = WriteBigMessageToPipe( + pipe, eventWin->SerializeStringToSendToBrowser()); + std::cout << "largeResponse done writing with error " << result + << std::endl; + eventWin->SetResponseSent(); + } else if (mode_ == Mode::Mode_resultWithInvalidStatus) { + content_analysis::sdk::ContentAnalysisEventWin* eventWin = + static_cast<content_analysis::sdk::ContentAnalysisEventWin*>( + event.get()); + HANDLE pipe = eventWin->Pipe(); + std::string serializedString = eventWin->SerializeStringToSendToBrowser(); + // The last byte is the status value. Set it to 100 + serializedString[serializedString.length() - 1] = 100; + WriteBigMessageToPipe(pipe, serializedString); + } else if (mode_ == Mode::Mode_messageTruncatedInMiddleOfString) { + unsigned char bytes[5]; + bytes[0] = 10; // field 1 (request_token), LEN encoding + bytes[1] = 13; // length 13 + bytes[2] = 65; // "A" + bytes[3] = 66; // "B" + bytes[4] = 67; // "C" + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageWithInvalidWireType) { + unsigned char bytes[5]; + bytes[0] = 15; // field 1 (request_token), "7" encoding (invalid value) + bytes[1] = 3; // length 3 + bytes[2] = 65; // "A" + bytes[3] = 66; // "B" + bytes[4] = 67; // "C" + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageWithUnusedFieldNumber) { + unsigned char bytes[5]; + bytes[0] = 82; // field 10 (this is invalid), LEN encoding + bytes[1] = 3; // length 3 + bytes[2] = 65; // "A" + bytes[3] = 66; // "B" + bytes[4] = 67; // "C" + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageWithWrongStringWireType) { + unsigned char bytes[2]; + bytes[0] = 10; // field 1 (request_token), VARINT encoding (but should be + // a string/LEN) + bytes[1] = 42; // value 42 + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageWithZeroTag) { + unsigned char bytes[1]; + // The protobuf deserialization code seems to handle this + // in a special case. + bytes[0] = 0; + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageWithZeroFieldButNonzeroWireType) { + // The protobuf deserialization code seems to handle this + // in a special case. + unsigned char bytes[5]; + bytes[0] = 2; // field 0 (invalid), LEN encoding + bytes[1] = 3; // length 13 + bytes[2] = 65; // "A" + bytes[3] = 66; // "B" + bytes[4] = 67; // "C" + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageWithGroupEnd) { + // GROUP_ENDs are obsolete and the deserialization code + // handles them in a special case. + unsigned char bytes[1]; + bytes[0] = 12; // field 1 (request_token), GROUP_END encoding + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageTruncatedInMiddleOfVarint) { + unsigned char bytes[2]; + bytes[0] = 16; // field 2 (status), VARINT encoding + bytes[1] = 128; // high bit is set, indicating there + // should be a byte after this + SendBytesOverPipe(bytes, event); + } else if (mode_ == Mode::Mode_messageTruncatedInMiddleOfTag) { + unsigned char bytes[1]; + bytes[0] = 128; // tag is actually encoded as a VARINT, so set the high + // bit, indicating there should be a byte after this + SendBytesOverPipe(bytes, event); + } else { + std::cout << "(misbehaving) Handler::AnalyzeContent() about to call " + "event->Send(), mode is " + << sModeToString[mode_] << std::endl; + // Send the response back to Google Chrome. + auto rc = event->Send(); + if (rc != content_analysis::sdk::ResultCode::OK) { + std::cout << "[Demo] Error sending response: " + << content_analysis::sdk::ResultCodeToString(rc) << std::endl; + std::cout << event->DebugString() << std::endl; + } + } + } + + private: + void OnBrowserConnected( + const content_analysis::sdk::BrowserInfo& info) override { + std::cout << std::endl << "==========" << std::endl; + std::cout << "Browser connected pid=" << info.pid << std::endl; + } + + void OnBrowserDisconnected( + const content_analysis::sdk::BrowserInfo& info) override { + std::cout << std::endl + << "Browser disconnected pid=" << info.pid << std::endl; + std::cout << "==========" << std::endl; + } + + void OnAnalysisRequested(std::unique_ptr<Event> event) override { + // If the agent is capable of analyzing content in the background, the + // events may be handled in background threads. Having said that, a + // event should not be assumed to be thread safe, that is, it should not + // be accessed by more than one thread concurrently. + // + // In this example code, the event is handled synchronously. + AnalyzeContent(std::move(event)); + } + void OnResponseAcknowledged( + const content_analysis::sdk::ContentAnalysisAcknowledgement& ack) + override { + const char* final_action = "<Unknown>"; + if (ack.has_final_action()) { + switch (ack.final_action()) { + case content_analysis::sdk::ContentAnalysisAcknowledgement:: + ACTION_UNSPECIFIED: + final_action = "<Unspecified>"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::ALLOW: + final_action = "Allow"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::REPORT_ONLY: + final_action = "Report only"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::WARN: + final_action = "Warn"; + break; + case content_analysis::sdk::ContentAnalysisAcknowledgement::BLOCK: + final_action = "Block"; + break; + } + } + + std::cout << "Ack: " << ack.request_token() << std::endl; + std::cout << " Final action: " << final_action << std::endl; + } + void OnCancelRequests( + const content_analysis::sdk::ContentAnalysisCancelRequests& cancel) + override { + std::cout << "Cancel: " << std::endl; + std::cout << " User action ID: " << cancel.user_action_id() << std::endl; + } + + void OnInternalError(const char* context, + content_analysis::sdk::ResultCode error) override { + std::cout << std::endl + << "*ERROR*: context=\"" << context << "\" " + << content_analysis::sdk::ResultCodeToString(error) << std::endl; + } + + void DumpRequest( + const content_analysis::sdk::ContentAnalysisRequest& request) { + std::string connector = "<Unknown>"; + if (request.has_analysis_connector()) { + switch (request.analysis_connector()) { + case content_analysis::sdk::FILE_DOWNLOADED: + connector = "download"; + break; + case content_analysis::sdk::FILE_ATTACHED: + connector = "attach"; + break; + case content_analysis::sdk::BULK_DATA_ENTRY: + connector = "bulk-data-entry"; + break; + case content_analysis::sdk::PRINT: + connector = "print"; + break; + case content_analysis::sdk::FILE_TRANSFER: + connector = "file-transfer"; + break; + default: + break; + } + } + + std::string url = + request.has_request_data() && request.request_data().has_url() + ? request.request_data().url() + : "<No URL>"; + + std::string tab_title = + request.has_request_data() && request.request_data().has_tab_title() + ? request.request_data().tab_title() + : "<No tab title>"; + + std::string filename = + request.has_request_data() && request.request_data().has_filename() + ? request.request_data().filename() + : "<No filename>"; + + std::string digest = + request.has_request_data() && request.request_data().has_digest() + ? request.request_data().digest() + : "<No digest>"; + + std::string file_path = + request.has_file_path() ? request.file_path() : "<none>"; + + std::string text_content = + request.has_text_content() ? request.text_content() : "<none>"; + + std::string machine_user = + request.has_client_metadata() && + request.client_metadata().has_browser() && + request.client_metadata().browser().has_machine_user() + ? request.client_metadata().browser().machine_user() + : "<No machine user>"; + + std::string email = + request.has_request_data() && request.request_data().has_email() + ? request.request_data().email() + : "<No email>"; + + time_t t = request.expires_at(); + + std::string user_action_id = request.has_user_action_id() + ? request.user_action_id() + : "<No user action id>"; + + std::cout << "Request: " << request.request_token() << std::endl; + std::cout << " User action ID: " << user_action_id << std::endl; + std::cout << " Expires at: " << ctime(&t); // Returned string includes \n. + std::cout << " Connector: " << connector << std::endl; + std::cout << " URL: " << url << std::endl; + std::cout << " Tab title: " << tab_title << std::endl; + std::cout << " Filename: " << filename << std::endl; + std::cout << " Digest: " << digest << std::endl; + std::cout << " Filepath: " << file_path << std::endl; + std::cout << " Text content: '" << text_content << "'" << std::endl; + std::cout << " Machine user: " << machine_user << std::endl; + std::cout << " Email: " << email << std::endl; + } + + bool ReadContentFromFile(const std::string& file_path, std::string* content) { + std::ifstream file(file_path, + std::ios::in | std::ios::binary | std::ios::ate); + if (!file.is_open()) return false; + + // Get file size. This example does not handle files larger than 1MB. + // Make sure content string can hold the contents of the file. + int size = file.tellg(); + if (size > 1024 * 1024) return false; + + content->resize(size + 1); + + // Read file into string. + file.seekg(0, std::ios::beg); + file.read(&(*content)[0], size); + content->at(size) = 0; + return true; + } + + bool ShouldBlockRequest(const std::string& content) { + // Determines if the request should be blocked. (not needed for the + // misbehaving agent) + std::cout << "'" << content << "' was not blocked\n"; + return false; + } + + unsigned long delay_; + Mode mode_; +}; + +#endif // CONTENT_ANALYSIS_DEMO_HANDLER_MISBEHAVING_H_ diff --git a/third_party/content_analysis_sdk/demo/modes.h b/third_party/content_analysis_sdk/demo/modes.h new file mode 100644 index 0000000000..debefc9d1a --- /dev/null +++ b/third_party/content_analysis_sdk/demo/modes.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +// #define AGENT_MODE(name) to do what you want and then #include this file + +AGENT_MODE(largeResponse) +AGENT_MODE(invalidUtf8StringStartByteIsContinuationByte) +AGENT_MODE(invalidUtf8StringEndsInMiddleOfMultibyteSequence) +AGENT_MODE(invalidUtf8StringOverlongEncoding) +AGENT_MODE(invalidUtf8StringMultibyteSequenceTooShort) +AGENT_MODE(invalidUtf8StringDecodesToInvalidCodePoint) +AGENT_MODE(stringWithEmbeddedNull) +AGENT_MODE(zeroResults) +AGENT_MODE(resultWithInvalidStatus) +AGENT_MODE(messageTruncatedInMiddleOfString) +AGENT_MODE(messageWithInvalidWireType) +AGENT_MODE(messageWithUnusedFieldNumber) +AGENT_MODE(messageWithWrongStringWireType) +AGENT_MODE(messageWithZeroTag) +AGENT_MODE(messageWithZeroFieldButNonzeroWireType) +AGENT_MODE(messageWithGroupEnd) +AGENT_MODE(messageTruncatedInMiddleOfVarint) +AGENT_MODE(messageTruncatedInMiddleOfTag) diff --git a/third_party/content_analysis_sdk/demo/request_queue.h b/third_party/content_analysis_sdk/demo/request_queue.h new file mode 100644 index 0000000000..8615774e2e --- /dev/null +++ b/third_party/content_analysis_sdk/demo/request_queue.h @@ -0,0 +1,70 @@ +// Copyright 2022 The Chromium Authors. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_ANALYSIS_DEMO_REQUST_QUEUE_H_ +#define CONTENT_ANALYSIS_DEMO_REQUST_QUEUE_H_ + +#include <condition_variable> +#include <memory> +#include <mutex> +#include <queue> + +#include "content_analysis/sdk/analysis_agent.h" + +// This class maintains a list of outstanding content analysis requests to +// process. Each request is encapsulated in one ContentAnalysisEvent. +// Requests are handled in FIFO order. +class RequestQueue { + public: + using Event = content_analysis::sdk::ContentAnalysisEvent; + + RequestQueue() = default; + virtual ~RequestQueue() = default; + + // Push a new content analysis event into the queue. + void push(std::unique_ptr<Event> event) { + std::lock_guard<std::mutex> lock(mutex_); + + events_.push(std::move(event)); + + // Wake before leaving to prevent unpredicatable scheduling. + cv_.notify_one(); + } + + // Pop the next request from the queue, blocking if necessary until an event + // is available. Returns a nullptr if the queue will produce no more + // events. + std::unique_ptr<Event> pop() { + std::unique_lock<std::mutex> lock(mutex_); + + while (!abort_ && events_.size() == 0) + cv_.wait(lock); + + std::unique_ptr<Event> event; + if (!abort_) { + event = std::move(events_.front()); + events_.pop(); + } + + return event; + } + + // Marks the queue as aborted. pop() will now return nullptr. + void abort() { + std::lock_guard<std::mutex> lg(mutex_); + + abort_ = true; + + // Wake before leaving to prevent unpredicatable scheduling. + cv_.notify_all(); + } + + private: + std::queue<std::unique_ptr<Event>> events_; + std::mutex mutex_; + std::condition_variable cv_; + bool abort_ = false; +}; + +#endif // CONTENT_ANALYSIS_DEMO_REQUST_QUEUE_H_ |