kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [kudu] 01/02: KUDU-2705: Add timing information to /scans
Date Thu, 07 Mar 2019 21:23:39 GMT
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 78d2c2f33cd914b3fd392f49bf93a8596724a073
Author: Will Berkeley <wdberkeley@gmail.org>
AuthorDate: Tue Feb 26 14:11:29 2019 -0800

    KUDU-2705: Add timing information to /scans
    
    This patch adds timing information to the /scans page. Specifically, it
    adds time(1)-like timing information for the time spent by a service
    thread working with a scanner, including both initialization and
    continuation of scans.
    
    Screenshot: https://github.com/wdberkeley/kudu/blob/scanspage_screenshot/ss.png
    
    Change-Id: Id30251557ae7f0d97e0d1328bd53511dae7a820b
    Reviewed-on: http://gerrit.cloudera.org:8080/12604
    Reviewed-by: Adar Dembo <adar@cloudera.com>
    Tested-by: Adar Dembo <adar@cloudera.com>
---
 src/kudu/tserver/scanners.cc              |  7 +++++
 src/kudu/tserver/scanners.h               | 43 +++++++++++++++++++++++++++++++
 src/kudu/tserver/tablet_service.cc        | 16 ++++++++----
 src/kudu/tserver/tserver_path_handlers.cc | 17 ++++++++++++
 src/kudu/util/stopwatch.h                 | 12 +++++++++
 www/scans.mustache                        |  4 ++-
 6 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/src/kudu/tserver/scanners.cc b/src/kudu/tserver/scanners.cc
index 3c6342c..ff97d55 100644
--- a/src/kudu/tserver/scanners.cc
+++ b/src/kudu/tserver/scanners.cc
@@ -44,6 +44,7 @@
 #include "kudu/util/logging.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/thread.h"
 
 DEFINE_int32(scanner_ttl_ms, 60000,
@@ -351,6 +352,11 @@ void Scanner::UpdateAccessTime() {
   last_access_time_ = MonoTime::Now();
 }
 
+void Scanner::AddTimings(const CpuTimes& elapsed) {
+  std::lock_guard<simple_spinlock> l(lock_);
+  cpu_times_.Add(elapsed);
+}
+
 void Scanner::Init(unique_ptr<RowwiseIterator> iter,
                    gscoped_ptr<ScanSpec> spec) {
   std::lock_guard<simple_spinlock> l(lock_);
@@ -414,6 +420,7 @@ ScanDescriptor Scanner::descriptor() const {
     std::lock_guard<simple_spinlock> l(lock_);
     descriptor.last_call_seq_id = call_seq_id_;
     descriptor.last_access_time = last_access_time_;
+    descriptor.cpu_times = cpu_times_;
   }
 
   return descriptor;
diff --git a/src/kudu/tserver/scanners.h b/src/kudu/tserver/scanners.h
index 3f03bd3..b014912 100644
--- a/src/kudu/tserver/scanners.h
+++ b/src/kudu/tserver/scanners.h
@@ -45,6 +45,7 @@
 #include "kudu/util/mutex.h"
 #include "kudu/util/oid_generator.h"
 #include "kudu/util/rw_mutex.h"
+#include "kudu/util/stopwatch.h"
 
 namespace kudu {
 
@@ -225,6 +226,9 @@ class Scanner {
   // period.
   void UpdateAccessTime();
 
+  // Add the timings in 'elapsed' to the total timings for this scanner.
+  void AddTimings(const CpuTimes& elapsed);
+
   // Return the auto-release pool which will be freed when this scanner
   // closes. This can be used as a storage area for the ScanSpec and any
   // associated data (eg storage for its predicates).
@@ -380,6 +384,10 @@ class Scanner {
   // this scanner.
   int64_t num_rows_returned_;
 
+  // The cumulative amounts of wall, user cpu, and system cpu time spent on
+  // this scanner, in seconds.
+  CpuTimes cpu_times_;
+
   DISALLOW_COPY_AND_ASSIGN(Scanner);
 };
 
@@ -420,6 +428,41 @@ struct ScanDescriptor {
   MonoTime start_time;
   MonoTime last_access_time;
   uint32_t last_call_seq_id;
+
+  // The cumulative amounts of wall, user cpu, and system cpu time spent on
+  // this scanner, in seconds.
+  CpuTimes cpu_times;
+};
+
+// RAII wrapper to update a scanner with timing information upon scope exit.
+class ScopedAddScannerTiming {
+ public:
+  // 'scanner' must outlive the scoped object.
+  explicit ScopedAddScannerTiming(Scanner* scanner)
+      : stopped_(false),
+        scanner_(scanner) {
+    sw_.start();
+  }
+
+  ~ScopedAddScannerTiming() {
+    if (!stopped_) {
+      Stop();
+    }
+  }
+
+  // Stop the timing and update the scanner.
+  void Stop() {
+    stopped_ = true;
+    sw_.stop();
+    scanner_->AddTimings(sw_.elapsed());
+    scanner_->UpdateAccessTime();
+  }
+
+  bool stopped_;
+  Scanner* scanner_;
+  Stopwatch sw_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedAddScannerTiming);
 };
 
 } // namespace tserver
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index 6f1261f..eb6ecda 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -1984,9 +1984,10 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   // If we early-exit out of this function, automatically unregister
   // the scanner.
   ScopedUnregisterScanner unreg_scanner(server_->scanner_manager(), scanner->id());
+  ScopedAddScannerTiming scanner_timer(scanner.get());
 
   // Create the user's requested projection.
-  // TODO: add test cases for bad projections including 0 columns
+  // TODO(todd): Add test cases for bad projections including 0 columns.
   Schema projection;
   Status s = ColumnPBsToSchema(scan_pb.projected_columns(), &projection);
   if (PREDICT_FALSE(!s.ok())) {
@@ -2167,6 +2168,9 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   }
 
   scanner->Init(std::move(iter), std::move(orig_spec));
+
+  // Stop the scanner timer because ContinueScanRequest starts its own timer.
+  scanner_timer.Stop();
   unreg_scanner.Cancel();
   *scanner_id = scanner->id();
 
@@ -2175,8 +2179,11 @@ Status TabletServiceImpl::HandleNewScanRequest(TabletReplica* replica,
   size_t batch_size_bytes = GetMaxBatchSizeBytesHint(req);
   if (batch_size_bytes > 0) {
     TRACE("Continuing scan request");
-    // TODO: instead of copying the pb, instead split HandleContinueScanRequest
-    // and call the second half directly
+    // TODO(wdberkeley): Instead of copying the pb, instead split
+    // HandleContinueScanRequest and call the second half directly. Once that's
+    // done, remove the call to ScopedAddScannerTiming::Stop() above (and the
+    // method as it won't be used) and start the timing for continue requests
+    // from the first half that is no longer executed in this codepath.
     ScanRequestPB continue_req(*req);
     continue_req.set_scanner_id(scanner->id());
     RETURN_NOT_OK(HandleContinueScanRequest(&continue_req, rpc_context, result_collector,
@@ -2231,6 +2238,7 @@ Status TabletServiceImpl::HandleContinueScanRequest(const ScanRequestPB*
req,
 
   // If we early-exit out of this function, automatically unregister the scanner.
   ScopedUnregisterScanner unreg_scanner(server_->scanner_manager(), scanner->id());
+  ScopedAddScannerTiming scanner_timer(scanner.get());
 
   VLOG(2) << "Found existing scanner " << scanner->id() << " for request:
"
           << SecureShortDebugString(*req);
@@ -2246,7 +2254,6 @@ Status TabletServiceImpl::HandleContinueScanRequest(const ScanRequestPB*
req,
     return Status::InvalidArgument("Invalid call sequence ID in scan request");
   }
   scanner->IncrementCallSeqId();
-  scanner->UpdateAccessTime();
 
   RowwiseIterator* iter = scanner->iter();
 
@@ -2350,7 +2357,6 @@ Status TabletServiceImpl::HandleContinueScanRequest(const ScanRequestPB*
req,
     tablet->metrics()->scanner_bytes_scanned_from_disk->IncrementBy(delta_stats.bytes_read);
   }
 
-  scanner->UpdateAccessTime();
   *has_more_results = !req->close_scanner() && iter->HasNext() &&
       !scanner->has_fulfilled_limit();
   if (*has_more_results) {
diff --git a/src/kudu/tserver/tserver_path_handlers.cc b/src/kudu/tserver/tserver_path_handlers.cc
index 52ec87f..eb858e3 100644
--- a/src/kudu/tserver/tserver_path_handlers.cc
+++ b/src/kudu/tserver/tserver_path_handlers.cc
@@ -64,6 +64,7 @@
 #include "kudu/util/maintenance_manager.pb.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/pb_util.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/url-coding.h"
 #include "kudu/util/web_callback_registry.h"
 
@@ -543,6 +544,14 @@ void ScanToJson(const ScanDescriptor& scan, EasyJson* json) {
   json->Set("time_since_start",
             HumanReadableElapsedTime::ToShortString(time_since_start.ToSeconds()));
 
+  const auto& cpu_times = scan.cpu_times;
+  json->Set("wall_secs",
+            HumanReadableElapsedTime::ToShortString(cpu_times.wall_seconds()));
+  json->Set("user_secs",
+            HumanReadableElapsedTime::ToShortString(cpu_times.user_cpu_seconds()));
+  json->Set("sys_secs",
+            HumanReadableElapsedTime::ToShortString(cpu_times.system_cpu_seconds()));
+
   json->Set("duration_title", duration.ToSeconds());
   json->Set("time_since_start_title", time_since_start.ToSeconds());
 
@@ -551,8 +560,16 @@ void ScanToJson(const ScanDescriptor& scan, EasyJson* json) {
 }
 } // anonymous namespace
 
+const char* kLongTimingTitle = "wall time, user cpu time, and system cpu time "
+    "spent processing the scan: the wall time differs from the duration in that "
+    "it counts the amount of time the server spent (or has spent so far) "
+    "processing the scan, whereas the duration measures the amount of time "
+    "the scanner was (or is so far) open, but possibly dormant waiting for the "
+    "client to request to continue the scan.";
+
 void TabletServerPathHandlers::HandleScansPage(const Webserver::WebRequest& /*req*/,
                                                Webserver::WebResponse* resp) {
+  resp->output->Set("timing_title", kLongTimingTitle);
   EasyJson scans = resp->output->Set("scans", EasyJson::kArray);
   vector<ScanDescriptor> descriptors = tserver_->scanner_manager()->ListScans();
 
diff --git a/src/kudu/util/stopwatch.h b/src/kudu/util/stopwatch.h
index f15b597..52d1851 100644
--- a/src/kudu/util/stopwatch.h
+++ b/src/kudu/util/stopwatch.h
@@ -132,8 +132,20 @@ struct CpuTimes {
   nanosecond_type system;
   int64_t context_switches;
 
+  CpuTimes() {
+    clear();
+  }
+
   void clear() { wall = user = system = context_switches = 0LL; }
 
+  // Add 'other' to this instance in the obvious way.
+  void Add(const CpuTimes& other) {
+    wall += other.wall;
+    user += other.user;
+    system += other.system;
+    context_switches += other.context_switches;
+  }
+
   // Return a string formatted similar to the output of the "time" shell command.
   std::string ToString() const {
     return StringPrintf(
diff --git a/www/scans.mustache b/www/scans.mustache
index 8f2936b..f12705f 100644
--- a/www/scans.mustache
+++ b/www/scans.mustache
@@ -26,8 +26,9 @@ under the License.
       <th>State</th>
       <th title="pseudo-SQL query description">Query</th>
       <th>Requestor</th>
-      <th title="running time of the scan">Duration</th>
+      <th title="amount of time that the scanner was/has been open on the server">Duration</th>
       <th title="elapsed time since the scan started">Time since start</th>
+      <th title="{{timing_title}}">Timing</th>
       <th>Column Stats</th>
     </tr>
   </thead>
@@ -42,6 +43,7 @@ under the License.
       <td><samp>{{requestor}}</samp></td>
       <td title="{{duration_title}}">{{duration}}</td>
       <td title="{{time_since_start_title}}">{{time_since_start}}</td>
+      <td>real: {{wall_secs}} user: {{user_secs}} sys: {{sys_secs}}</td>
 
       <td>
         <table class="table table-striped">


Mime
View raw message