summaryrefslogtreecommitdiffstats
path: root/src/jaegertracing/thrift/contrib/fb303/cpp/ServiceTracker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jaegertracing/thrift/contrib/fb303/cpp/ServiceTracker.cpp')
-rw-r--r--src/jaegertracing/thrift/contrib/fb303/cpp/ServiceTracker.cpp481
1 files changed, 481 insertions, 0 deletions
diff --git a/src/jaegertracing/thrift/contrib/fb303/cpp/ServiceTracker.cpp b/src/jaegertracing/thrift/contrib/fb303/cpp/ServiceTracker.cpp
new file mode 100644
index 000000000..7a61b21a9
--- /dev/null
+++ b/src/jaegertracing/thrift/contrib/fb303/cpp/ServiceTracker.cpp
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <sys/time.h>
+
+#include "FacebookBase.h"
+#include "ServiceTracker.h"
+#include <thrift/concurrency/ThreadManager.h>
+
+using namespace std;
+using namespace facebook::fb303;
+using namespace apache::thrift::concurrency;
+
+
+uint64_t ServiceTracker::CHECKPOINT_MINIMUM_INTERVAL_SECONDS = 60;
+int ServiceTracker::LOG_LEVEL = 5;
+
+
+ServiceTracker::ServiceTracker(facebook::fb303::FacebookBase *handler,
+ void (*logMethod)(int, const string &),
+ bool featureCheckpoint,
+ bool featureStatusCheck,
+ bool featureThreadCheck,
+ Stopwatch::Unit stopwatchUnit)
+ : handler_(handler), logMethod_(logMethod),
+ featureCheckpoint_(featureCheckpoint),
+ featureStatusCheck_(featureStatusCheck),
+ featureThreadCheck_(featureThreadCheck),
+ stopwatchUnit_(stopwatchUnit),
+ checkpointServices_(0)
+{
+ if (featureCheckpoint_) {
+ time_t now = time(NULL);
+ checkpointTime_ = now;
+ } else {
+ checkpointTime_ = 0;
+ }
+}
+
+/**
+ * Registers the beginning of a "service method": basically, any of
+ * the implementations of Thrift remote procedure calls that a
+ * FacebookBase handler is handling. Controls concurrent
+ * services and reports statistics (via log and via fb303 counters).
+ * Throws an exception if the server is not ready to handle service
+ * methods yet.
+ *
+ * note: The relationship between startService() and finishService()
+ * is currently defined so that a call to finishService() should only
+ * be matched to this call to startService() if this method returns
+ * without exception. It wouldn't be a problem to implement things
+ * the other way, so that *every* start needed a finish, but this
+ * convention was chosen to match the way an object's constructor and
+ * destructor work together, i.e. to work well with ServiceMethod
+ * objects.
+ *
+ * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod
+ * object instantiated at the start
+ * of the service method.
+ */
+void
+ServiceTracker::startService(const ServiceMethod &serviceMethod)
+{
+ // note: serviceMethod.timer_ automatically starts at construction.
+
+ // log service start
+ logMethod_(5, serviceMethod.signature_);
+
+ // check handler ready
+ if (featureStatusCheck_ && !serviceMethod.featureLogOnly_) {
+ // note: Throwing exceptions before counting statistics. See note
+ // in method header.
+ // note: A STOPPING server is not accepting new connections, but it
+ // is still handling any already-connected threads -- so from the
+ // service method's point of view, a status of STOPPING is a green
+ // light.
+ facebook::fb303::fb_status status = handler_->getStatus();
+ if (status != facebook::fb303::ALIVE
+ && status != facebook::fb303::STOPPING) {
+ if (status == facebook::fb303::STARTING) {
+ throw ServiceException("Server starting up; please try again later");
+ } else {
+ throw ServiceException("Server not alive; please try again later");
+ }
+ }
+ }
+
+ // check server threads
+ if (featureThreadCheck_ && !serviceMethod.featureLogOnly_) {
+ // note: Might want to put these messages in reportCheckpoint() if
+ // log is getting spammed.
+ if (threadManager_ != NULL) {
+ size_t idle_count = threadManager_->idleWorkerCount();
+ if (idle_count == 0) {
+ stringstream message;
+ message << "service " << serviceMethod.signature_
+ << ": all threads (" << threadManager_->workerCount()
+ << ") in use";
+ logMethod_(3, message.str());
+ }
+ }
+ }
+}
+
+/**
+ * Logs a significant step in the middle of a "service method"; see
+ * startService.
+ *
+ * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod
+ * object instantiated at the start
+ * of the service method.
+ * @return int64_t Elapsed units (see stopwatchUnit_) since ServiceMethod
+ * instantiation.
+ */
+int64_t
+ServiceTracker::stepService(const ServiceMethod &serviceMethod,
+ const string &stepName)
+{
+ stringstream message;
+ string elapsed_label;
+ int64_t elapsed = serviceMethod.timer_.elapsedUnits(stopwatchUnit_,
+ &elapsed_label);
+ message << serviceMethod.signature_
+ << ' ' << stepName
+ << " [" << elapsed_label << ']';
+ logMethod_(5, message.str());
+ return elapsed;
+}
+
+/**
+ * Registers the end of a "service method"; see startService().
+ *
+ * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod
+ * object instantiated at the start
+ * of the service method.
+ */
+void
+ServiceTracker::finishService(const ServiceMethod &serviceMethod)
+{
+ // log end of service
+ stringstream message;
+ string duration_label;
+ int64_t duration = serviceMethod.timer_.elapsedUnits(stopwatchUnit_,
+ &duration_label);
+ message << serviceMethod.signature_
+ << " finish [" << duration_label << ']';
+ logMethod_(5, message.str());
+
+ // count, record, and maybe report service statistics
+ if (!serviceMethod.featureLogOnly_) {
+
+ if (!featureCheckpoint_) {
+
+ // lifetime counters
+ // (note: No need to lock statisticsMutex_ if not doing checkpoint;
+ // FacebookService::incrementCounter() is already thread-safe.)
+ handler_->incrementCounter("lifetime_services");
+
+ } else {
+
+ statisticsMutex_.lock();
+ // note: No exceptions expected from this code block. Wrap in a try
+ // just to be safe.
+ try {
+
+ // lifetime counters
+ // note: Good to synchronize this with the increment of
+ // checkpoint services, even though incrementCounter() is
+ // already thread-safe, for the sake of checkpoint reporting
+ // consistency (i.e. since the last checkpoint,
+ // lifetime_services has incremented by checkpointServices_).
+ handler_->incrementCounter("lifetime_services");
+
+ // checkpoint counters
+ checkpointServices_++;
+ checkpointDuration_ += duration;
+
+ // per-service timing
+ // note kjv: According to my tests it is very slightly faster to
+ // call insert() once (and detect not-found) than calling find()
+ // and then maybe insert (if not-found). However, the difference
+ // is tiny for small maps like this one, and the code for the
+ // faster solution is slightly less readable. Also, I wonder if
+ // the instantiation of the (often unused) pair to insert makes
+ // the first algorithm slower after all.
+ map<string, pair<uint64_t, uint64_t> >::iterator iter;
+ iter = checkpointServiceDuration_.find(serviceMethod.name_);
+ if (iter != checkpointServiceDuration_.end()) {
+ iter->second.first++;
+ iter->second.second += duration;
+ } else {
+ checkpointServiceDuration_.insert(make_pair(serviceMethod.name_,
+ make_pair(1, duration)));
+ }
+
+ // maybe report checkpoint
+ // note: ...if it's been long enough since the last report.
+ time_t now = time(NULL);
+ uint64_t check_interval = now - checkpointTime_;
+ if (check_interval >= CHECKPOINT_MINIMUM_INTERVAL_SECONDS) {
+ reportCheckpoint();
+ }
+
+ } catch (...) {
+ statisticsMutex_.unlock();
+ throw;
+ }
+ statisticsMutex_.unlock();
+
+ }
+ }
+}
+
+/**
+ * Logs some statistics gathered since the last call to this method.
+ *
+ * note: Thread race conditions on this method could cause
+ * misreporting and/or undefined behavior; the caller must protect
+ * uses of the object variables (and calls to this method) with a
+ * mutex.
+ *
+ */
+void
+ServiceTracker::reportCheckpoint()
+{
+ time_t now = time(NULL);
+
+ uint64_t check_count = checkpointServices_;
+ uint64_t check_interval = now - checkpointTime_;
+ uint64_t check_duration = checkpointDuration_;
+
+ // export counters for timing of service methods (by service name)
+ handler_->setCounter("checkpoint_time", check_interval);
+ map<string, pair<uint64_t, uint64_t> >::iterator iter;
+ uint64_t count;
+ for (iter = checkpointServiceDuration_.begin();
+ iter != checkpointServiceDuration_.end();
+ ++iter) {
+ count = iter->second.first;
+ handler_->setCounter(string("checkpoint_count_") + iter->first, count);
+ if (count == 0) {
+ handler_->setCounter(string("checkpoint_speed_") + iter->first,
+ 0);
+ } else {
+ handler_->setCounter(string("checkpoint_speed_") + iter->first,
+ iter->second.second / count);
+ }
+ }
+
+ // reset checkpoint variables
+ // note: Clearing the map while other threads are using it might
+ // cause undefined behavior.
+ checkpointServiceDuration_.clear();
+ checkpointTime_ = now;
+ checkpointServices_ = 0;
+ checkpointDuration_ = 0;
+
+ // get lifetime variables
+ uint64_t life_count = handler_->getCounter("lifetime_services");
+ uint64_t life_interval = now - handler_->aliveSince();
+
+ // log checkpoint
+ stringstream message;
+ message << "checkpoint_time:" << check_interval
+ << " checkpoint_services:" << check_count
+ << " checkpoint_speed_sum:" << check_duration
+ << " lifetime_time:" << life_interval
+ << " lifetime_services:" << life_count;
+ if (featureThreadCheck_ && threadManager_ != NULL) {
+ size_t worker_count = threadManager_->workerCount();
+ size_t idle_count = threadManager_->idleWorkerCount();
+ message << " total_workers:" << worker_count
+ << " active_workers:" << (worker_count - idle_count);
+ }
+ logMethod_(4, message.str());
+}
+
+/**
+ * Remembers the thread manager used in the server, for monitoring thread
+ * activity.
+ *
+ * @param shared_ptr<ThreadManager> threadManager The server's thread manager.
+ */
+void
+ServiceTracker::setThreadManager(boost::shared_ptr<ThreadManager>
+ threadManager)
+{
+ threadManager_ = threadManager;
+}
+
+/**
+ * Logs messages to stdout; the passed message will be logged if the
+ * passed level is less than or equal to LOG_LEVEL.
+ *
+ * This is the default logging method used by the ServiceTracker. An
+ * alternate logging method (that accepts the same parameters) may be
+ * specified to the constructor.
+ *
+ * @param int level A level associated with the message: higher levels
+ * are used to indicate higher levels of detail.
+ * @param string message The message to log.
+ */
+void
+ServiceTracker::defaultLogMethod(int level, const string &message)
+{
+ if (level <= LOG_LEVEL) {
+ string level_string;
+ time_t now = time(NULL);
+ char now_pretty[26];
+ ctime_r(&now, now_pretty);
+ now_pretty[24] = '\0';
+ switch (level) {
+ case 1:
+ level_string = "CRITICAL";
+ break;
+ case 2:
+ level_string = "ERROR";
+ break;
+ case 3:
+ level_string = "WARNING";
+ break;
+ case 5:
+ level_string = "DEBUG";
+ break;
+ case 4:
+ default:
+ level_string = "INFO";
+ break;
+ }
+ cout << '[' << level_string << "] [" << now_pretty << "] "
+ << message << endl;
+ }
+}
+
+
+/**
+ * Creates a Stopwatch, which can report the time elapsed since its
+ * creation.
+ *
+ */
+Stopwatch::Stopwatch()
+{
+ gettimeofday(&startTime_, NULL);
+}
+
+void
+Stopwatch::reset()
+{
+ gettimeofday(&startTime_, NULL);
+}
+
+uint64_t
+Stopwatch::elapsedUnits(Stopwatch::Unit unit, string *label) const
+{
+ timeval now_time;
+ gettimeofday(&now_time, NULL);
+ time_t duration_secs = now_time.tv_sec - startTime_.tv_sec;
+
+ uint64_t duration_units;
+ switch (unit) {
+ case UNIT_SECONDS:
+ duration_units = duration_secs
+ + (now_time.tv_usec - startTime_.tv_usec + 500000) / 1000000;
+ if (NULL != label) {
+ stringstream ss_label;
+ ss_label << duration_units << " secs";
+ label->assign(ss_label.str());
+ }
+ break;
+ case UNIT_MICROSECONDS:
+ duration_units = duration_secs * 1000000
+ + now_time.tv_usec - startTime_.tv_usec;
+ if (NULL != label) {
+ stringstream ss_label;
+ ss_label << duration_units << " us";
+ label->assign(ss_label.str());
+ }
+ break;
+ case UNIT_MILLISECONDS:
+ default:
+ duration_units = duration_secs * 1000
+ + (now_time.tv_usec - startTime_.tv_usec + 500) / 1000;
+ if (NULL != label) {
+ stringstream ss_label;
+ ss_label << duration_units << " ms";
+ label->assign(ss_label.str());
+ }
+ break;
+ }
+ return duration_units;
+}
+
+/**
+ * Creates a ServiceMethod, used for tracking a single service method
+ * invocation (via the ServiceTracker). The passed name of the
+ * ServiceMethod is used to group statistics (e.g. counts and durations)
+ * for similar invocations; the passed signature is used to uniquely
+ * identify the particular invocation in the log.
+ *
+ * note: A version of this constructor is provided that automatically
+ * forms a signature the name and a passed numeric id. Silly, sure,
+ * but commonly used, since it often saves the caller a line or two of
+ * code.
+ *
+ * @param ServiceTracker *tracker The service tracker that will track this
+ * ServiceMethod.
+ * @param const string &name The service method name (usually independent
+ * of service method parameters).
+ * @param const string &signature A signature uniquely identifying the method
+ * invocation (usually name plus parameters).
+ */
+ServiceMethod::ServiceMethod(ServiceTracker *tracker,
+ const string &name,
+ const string &signature,
+ bool featureLogOnly)
+ : tracker_(tracker), name_(name), signature_(signature),
+ featureLogOnly_(featureLogOnly)
+{
+ // note: timer_ automatically starts at construction.
+
+ // invoke tracker to start service
+ // note: Might throw. If it throws, then this object's destructor
+ // won't be called, which is according to plan: finishService() is
+ // only supposed to be matched to startService() if startService()
+ // returns without error.
+ tracker_->startService(*this);
+}
+
+ServiceMethod::ServiceMethod(ServiceTracker *tracker,
+ const string &name,
+ uint64_t id,
+ bool featureLogOnly)
+ : tracker_(tracker), name_(name), featureLogOnly_(featureLogOnly)
+{
+ // note: timer_ automatically starts at construction.
+ stringstream ss_signature;
+ ss_signature << name << " (" << id << ')';
+ signature_ = ss_signature.str();
+
+ // invoke tracker to start service
+ // note: Might throw. If it throws, then this object's destructor
+ // won't be called, which is according to plan: finishService() is
+ // only supposed to be matched to startService() if startService()
+ // returns without error.
+ tracker_->startService(*this);
+}
+
+ServiceMethod::~ServiceMethod()
+{
+ // invoke tracker to finish service
+ // note: Not expecting an exception from this code, but
+ // finishService() might conceivably throw an out-of-memory
+ // exception.
+ try {
+ tracker_->finishService(*this);
+ } catch (...) {
+ // don't throw
+ }
+}
+
+uint64_t
+ServiceMethod::step(const std::string &stepName)
+{
+ return tracker_->stepService(*this, stepName);
+}