quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject [02/11] incubator-quickstep git commit: Updates to save blocks and analyze
Date Mon, 11 Dec 2017 22:07:41 GMT
Updates to save blocks and analyze


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/b45cdbc6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/b45cdbc6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/b45cdbc6

Branch: refs/heads/transitive-closure
Commit: b45cdbc63be488aad3211b5146dfe6f8fbfdc024
Parents: df20e4c
Author: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Authored: Mon Nov 27 16:21:03 2017 -0600
Committer: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Committed: Mon Nov 27 16:21:03 2017 -0600

----------------------------------------------------------------------
 catalog/CatalogRelationStatistics.hpp |   9 ++
 cli/CommandExecutor.cpp               | 249 ++++++++++++++++++++++++-----
 cli/Constants.hpp                     |   2 +
 storage/StorageManager.cpp            |  12 +-
 4 files changed, 222 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b45cdbc6/catalog/CatalogRelationStatistics.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationStatistics.hpp b/catalog/CatalogRelationStatistics.hpp
index df95231..55fc747 100644
--- a/catalog/CatalogRelationStatistics.hpp
+++ b/catalog/CatalogRelationStatistics.hpp
@@ -68,6 +68,15 @@ class CatalogRelationStatistics {
   serialization::CatalogRelationStatistics getProto() const;
 
   /**
+   * @brief Clear all statistics.
+   */
+  void clear() {
+    num_tuples_ = kNullValue;
+    column_stats_.clear();
+    is_exact_ = true;
+  }
+
+  /**
    * @brief Check whether the statistics are exact for the relation.
    *
    * return True if the statistics are exact for the relation, false otherwise.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b45cdbc6/cli/CommandExecutor.cpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.cpp b/cli/CommandExecutor.cpp
index 6a84672..7976d7d 100644
--- a/cli/CommandExecutor.cpp
+++ b/cli/CommandExecutor.cpp
@@ -201,9 +201,28 @@ void ExecuteAnalyze(const PtrVector<ParseString> &arguments,
     CatalogRelationStatistics *mutable_stat =
         mutable_relation->getStatisticsMutable();
 
+    mutable_stat->clear();
+
     const std::string rel_name = EscapeQuotes(relation.getName(), '"');
 
-    // Get the number of distinct values for each column.
+    // Get the number of tuples for the relation.
+    std::string query_string = "SELECT COUNT(*) FROM \"";
+    query_string.append(rel_name);
+    query_string.append("\";");
+
+    TypedValue num_tuples =
+        ExecuteQueryForSingleResult(main_thread_client_id,
+                                    foreman_client_id,
+                                    query_string,
+                                    bus,
+                                    storage_manager,
+                                    query_processor,
+                                    parser_wrapper.get());
+
+    DCHECK_EQ(TypeID::kLong, num_tuples.getTypeID());
+    mutable_stat->setNumTuples(num_tuples.getLiteral<std::int64_t>());
+
+    // Get the min/max values for each column.
     for (const CatalogAttribute &attribute : relation) {
       const std::string attr_name = EscapeQuotes(attribute.getName(), '"');
       const Type &attr_type = attribute.getType();
@@ -211,24 +230,15 @@ void ExecuteAnalyze(const PtrVector<ParseString> &arguments,
           AggregateFunctionMin::Instance().canApplyToTypes({&attr_type});
       bool is_max_applicable =
           AggregateFunctionMax::Instance().canApplyToTypes({&attr_type});
+      if (!is_min_applicable || !is_max_applicable) {
+        continue;
+      }
 
-      // NOTE(jianqiao): Note that the relation name and the attribute names may
-      // contain non-letter characters, e.g. CREATE TABLE "with space"("1" int).
-      // So here we need to format the names with double quotes (").
-      std::string query_string = "SELECT COUNT(DISTINCT \"";
+      std::string query_string = "SELECT MIN(\"";
       query_string.append(attr_name);
-      query_string.append("\")");
-      if (is_min_applicable) {
-        query_string.append(", MIN(\"");
-        query_string.append(attr_name);
-        query_string.append("\")");
-      }
-      if (is_max_applicable) {
-        query_string.append(", MAX(\"");
-        query_string.append(attr_name);
-        query_string.append("\")");
-      }
-      query_string.append(" FROM \"");
+      query_string.append("\"), MAX(\"");
+      query_string.append(attr_name);
+      query_string.append("\") FROM \"");
       query_string.append(rel_name);
       query_string.append("\";");
 
@@ -240,42 +250,181 @@ void ExecuteAnalyze(const PtrVector<ParseString> &arguments,
                                    storage_manager,
                                    query_processor,
                                    parser_wrapper.get());
-
-      auto results_it = results.begin();
-      DCHECK_EQ(TypeID::kLong, results_it->getTypeID());
+      DCHECK_EQ(2u, results.size());
 
       const attribute_id attr_id = attribute.getID();
-      mutable_stat->setNumDistinctValues(attr_id,
-                                         results_it->getLiteral<std::int64_t>());
-      if (is_min_applicable) {
-        ++results_it;
-        mutable_stat->setMinValue(attr_id, *results_it);
+      mutable_stat->setMinValue(attr_id, results[0]);
+      mutable_stat->setMaxValue(attr_id, results[1]);
+    }
+
+    // Get the number of distinct values for each column.
+    for (const CatalogAttribute &attribute : relation) {
+      const std::string attr_name = EscapeQuotes(attribute.getName(), '"');
+
+      std::string query_string = "SELECT COUNT(*) FROM (SELECT \"";
+      query_string.append(attr_name);
+      query_string.append("\" FROM \"");
+      query_string.append(rel_name);
+      query_string.append("\" GROUP BY \"");
+      query_string.append(attr_name);
+      query_string.append("\") t;");
+
+      TypedValue num_distinct_values =
+          ExecuteQueryForSingleResult(main_thread_client_id,
+                                      foreman_client_id,
+                                      query_string,
+                                      bus,
+                                      storage_manager,
+                                      query_processor,
+                                      parser_wrapper.get());
+
+      DCHECK_EQ(TypeID::kLong, num_distinct_values.getTypeID());
+      mutable_stat->setNumDistinctValues(
+          attribute.getID(), num_distinct_values.getLiteral<std::int64_t>());
+    }
+
+    fprintf(out, "done\n");
+    fflush(out);
+  }
+  query_processor->markCatalogAltered();
+  query_processor->saveCatalog();
+}
+
+void ExecuteAnalyzeRange(const PtrVector<ParseString> &arguments,
+                         const tmb::client_id main_thread_client_id,
+                         const tmb::client_id foreman_client_id,
+                         MessageBus *bus,
+                         StorageManager *storage_manager,
+                         QueryProcessor *query_processor,
+                         FILE *out) {
+  const CatalogDatabase &database = *query_processor->getDefaultDatabase();
+
+  std::unique_ptr<SqlParserWrapper> parser_wrapper(new SqlParserWrapper());
+  std::vector<std::reference_wrapper<const CatalogRelation>> relations;
+  if (arguments.empty()) {
+    relations.insert(relations.begin(), database.begin(), database.end());
+  } else {
+    for (const auto &rel_name : arguments) {
+      const CatalogRelation *rel = database.getRelationByName(rel_name.value());
+      if (rel == nullptr) {
+        THROW_SQL_ERROR_AT(&rel_name) << "Table does not exist";
+      } else {
+        relations.emplace_back(*rel);
+      }
+    }
+  }
+
+  // Analyze each relation in the database.
+  for (const CatalogRelation &relation : relations) {
+    fprintf(out, "Analyzing %s ... ", relation.getName().c_str());
+    fflush(out);
+
+    CatalogRelation *mutable_relation =
+        query_processor->getDefaultDatabase()->getRelationByIdMutable(relation.getID());
+    CatalogRelationStatistics *mutable_stat =
+        mutable_relation->getStatisticsMutable();
+
+    if (!mutable_stat->isExact()) {
+      mutable_stat->clear();
+
+      const std::string rel_name = EscapeQuotes(relation.getName(), '"');
+
+      for (const CatalogAttribute &attribute : relation) {
+        const std::string attr_name = EscapeQuotes(attribute.getName(), '"');
+        const Type &attr_type = attribute.getType();
+        bool is_min_applicable =
+            AggregateFunctionMin::Instance().canApplyToTypes({&attr_type});
+        bool is_max_applicable =
+            AggregateFunctionMax::Instance().canApplyToTypes({&attr_type});
+        if (!is_min_applicable || !is_max_applicable) {
+          continue;
+        }
+
+        std::string query_string = "SELECT MIN(\"";
+        query_string.append(attr_name);
+        query_string.append("\"), MAX(\"");
+        query_string.append(attr_name);
+        query_string.append("\") FROM \"");
+        query_string.append(rel_name);
+        query_string.append("\";");
+
+        std::vector<TypedValue> results =
+            ExecuteQueryForSingleRow(main_thread_client_id,
+                                     foreman_client_id,
+                                     query_string,
+                                     bus,
+                                     storage_manager,
+                                     query_processor,
+                                     parser_wrapper.get());
+        DCHECK_EQ(2u, results.size());
+
+        const attribute_id attr_id = attribute.getID();
+        mutable_stat->setMinValue(attr_id, results[0]);
+        mutable_stat->setMaxValue(attr_id, results[1]);
       }
-      if (is_max_applicable) {
-        ++results_it;
-        mutable_stat->setMaxValue(attr_id, *results_it);
+    }
+    fprintf(out, "done\n");
+    fflush(out);
+  }
+  query_processor->markCatalogAltered();
+  query_processor->saveCatalog();
+}
+
+void ExecuteAnalyzeCount(const PtrVector<ParseString> &arguments,
+                         const tmb::client_id main_thread_client_id,
+                         const tmb::client_id foreman_client_id,
+                         MessageBus *bus,
+                         StorageManager *storage_manager,
+                         QueryProcessor *query_processor,
+                         FILE *out) {
+  const CatalogDatabase &database = *query_processor->getDefaultDatabase();
+
+  std::unique_ptr<SqlParserWrapper> parser_wrapper(new SqlParserWrapper());
+  std::vector<std::reference_wrapper<const CatalogRelation>> relations;
+  if (arguments.empty()) {
+    relations.insert(relations.begin(), database.begin(), database.end());
+  } else {
+    for (const auto &rel_name : arguments) {
+      const CatalogRelation *rel = database.getRelationByName(rel_name.value());
+      if (rel == nullptr) {
+        THROW_SQL_ERROR_AT(&rel_name) << "Table does not exist";
+      } else {
+        relations.emplace_back(*rel);
       }
     }
+  }
 
-    // Get the number of tuples for the relation.
-    std::string query_string = "SELECT COUNT(*) FROM \"";
-    query_string.append(rel_name);
-    query_string.append("\";");
+  // Analyze each relation in the database.
+  for (const CatalogRelation &relation : relations) {
+    fprintf(out, "Analyzing %s ... ", relation.getName().c_str());
+    fflush(out);
 
-    TypedValue num_tuples =
-        ExecuteQueryForSingleResult(main_thread_client_id,
-                                    foreman_client_id,
-                                    query_string,
-                                    bus,
-                                    storage_manager,
-                                    query_processor,
-                                    parser_wrapper.get());
+    CatalogRelation *mutable_relation =
+        query_processor->getDefaultDatabase()->getRelationByIdMutable(relation.getID());
+    CatalogRelationStatistics *mutable_stat =
+        mutable_relation->getStatisticsMutable();
 
-    DCHECK_EQ(TypeID::kLong, num_tuples.getTypeID());
-    mutable_stat->setNumTuples(num_tuples.getLiteral<std::int64_t>());
+    if (!mutable_stat->isExact()) {
+      mutable_stat->clear();
 
-    mutable_stat->setExactness(true);
+      // Get the number of tuples for the relation.
+      std::string query_string = "SELECT COUNT(*) FROM \"";
+      query_string.append(EscapeQuotes(relation.getName(), '"'));
+      query_string.append("\";");
 
+      TypedValue num_tuples =
+          ExecuteQueryForSingleResult(main_thread_client_id,
+                                      foreman_client_id,
+                                      query_string,
+                                      bus,
+                                      storage_manager,
+                                      query_processor,
+                                      parser_wrapper.get());
+
+      DCHECK_EQ(TypeID::kLong, num_tuples.getTypeID());
+      mutable_stat->setNumTuples(num_tuples.getLiteral<std::int64_t>());
+
+    }
     fprintf(out, "done\n");
     fflush(out);
   }
@@ -314,6 +463,20 @@ void executeCommand(const ParseStatement &statement,
                    bus,
                    storage_manager,
                    query_processor, out);
+  } else if (command_str == kAnalyzeRangeCommand) {
+    ExecuteAnalyzeRange(arguments,
+                        main_thread_client_id,
+                        foreman_client_id,
+                        bus,
+                        storage_manager,
+                        query_processor, out);
+  } else if (command_str == kAnalyzeCountCommand) {
+    ExecuteAnalyzeCount(arguments,
+                        main_thread_client_id,
+                        foreman_client_id,
+                        bus,
+                        storage_manager,
+                        query_processor, out);
   } else {
     THROW_SQL_ERROR_AT(command.command()) << "Invalid Command";
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b45cdbc6/cli/Constants.hpp
----------------------------------------------------------------------
diff --git a/cli/Constants.hpp b/cli/Constants.hpp
index 0b4a37b..8934b03 100644
--- a/cli/Constants.hpp
+++ b/cli/Constants.hpp
@@ -30,6 +30,8 @@ namespace cli {
 constexpr char kDescribeDatabaseCommand[] = "\\dt";
 constexpr char kDescribeTableCommand[] = "\\d";
 constexpr char kAnalyzeCommand[] = "\\analyze";
+constexpr char kAnalyzeRangeCommand[] = "\\analyzerange";
+constexpr char kAnalyzeCountCommand[] = "\\analyzecount";
 
 /** @} */
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b45cdbc6/storage/StorageManager.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageManager.cpp b/storage/StorageManager.cpp
index 5924607..78db0cd 100644
--- a/storage/StorageManager.cpp
+++ b/storage/StorageManager.cpp
@@ -271,9 +271,7 @@ StorageManager::~StorageManager() {
        it != blocks_.end();
        ++it) {
     if (it->second.block->isDirty()) {
-      LOG(WARNING) << (it->second.block->isBlob() ? "Blob " : "Block ")
-                   << "with ID " << BlockIdUtil::ToString(it->first)
-                   << " is dirty during StorageManager shutdown";
+      saveBlockOrBlob(it->first, true);
     }
     delete it->second.block;
     deallocateSlots(it->second.block_memory, it->second.block_memory_size);
@@ -393,6 +391,10 @@ bool StorageManager::saveBlockOrBlob(const block_id block, const bool
force) {
   // particular entry in 'blocks_' for the specified 'block'. If and when we
   // switch blocks_ to something with more fine-grained locking, this should
   // be revisited.
+  if (!force) {
+    return true;
+  }
+
   SpinSharedMutexSharedLock<false> read_lock(blocks_shared_mutex_);
 
   std::unordered_map<block_id, BlockHandle>::iterator block_it = blocks_.find(block);
@@ -400,10 +402,6 @@ bool StorageManager::saveBlockOrBlob(const block_id block, const bool
force) {
     return false;
   }
 
-  if (!(force || block_it->second.block->isDirty())) {
-    return true;
-  }
-
   bool res = file_manager_->writeBlockOrBlob(block,
                                              block_it->second.block_memory,
                                              kSlotSizeBytes * (block_it->second.block_memory_size));


Mime
View raw message