ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aonis...@apache.org
Subject [1/2] ambari git commit: AMBARI-14660. HistoryServer upgrade times out when /app-logs is too large (aonishuk)
Date Fri, 15 Jan 2016 11:52:50 GMT
Repository: ambari
Updated Branches:
  refs/heads/branch-2.2 91d5d0302 -> 27532c2e4
  refs/heads/trunk a14444e6a -> 696c404a2


AMBARI-14660. HistoryServer upgrade times out when /app-logs is too large (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/696c404a
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/696c404a
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/696c404a

Branch: refs/heads/trunk
Commit: 696c404a2e15d3d892faad0af9ad63f35a34dae3
Parents: a14444e
Author: Andrew Onishuk <aonishuk@hortonworks.com>
Authored: Fri Jan 15 13:52:39 2016 +0200
Committer: Andrew Onishuk <aonishuk@hortonworks.com>
Committed: Fri Jan 15 13:52:39 2016 +0200

----------------------------------------------------------------------
 .../libraries/providers/hdfs_resource.py        | 31 ++++++++++++--------
 1 file changed, 19 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/696c404a/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py
b/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py
index 71c4d5a..ebcf1a4 100644
--- a/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py
+++ b/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py
@@ -219,10 +219,17 @@ class HdfsResourceWebHDFS:
   We should still have the other implementations for such a cases.
   """
   
-  # if we have more than this count of files to recursively chmod/chown
-  # webhdfs won't be used, but 'hadoop fs -chmod (or chown) -R ..' As it can really slow.
-  # (in one second ~17 files can be chmoded)
-  MAX_FILES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS = 1000 
+  """
+  If we have more than this count of files to recursively chmod/chown
+  webhdfs won't be used, but 'hadoop fs -chmod (or chown) -R ..' As it can really slow.
+  (in one second ~17 files can be chmoded)
+  """
+  MAX_FILES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS = 1000
+  """
+  This is used to avoid a lot of liststatus commands, which can take some time if directory
+  contains a lot of files. LISTSTATUS of directory with 1000 files takes ~0.5 seconds.
+  """
+  MAX_DIRECTORIES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS = 250
   
   def action_execute(self, main_resource):
     pass
@@ -349,12 +356,12 @@ class HdfsResourceWebHDFS:
     results = []
     
     if self.main_resource.resource.recursive_chown:
-      self._fill_directories_list(self.main_resource.resource.target, results)
+      content_summary = self.util.run_command(self.main_resource.resource.target, 'GETCONTENTSUMMARY',
method='GET', assertable_result=False)
       
-      # if we don't do this, we can end up waiting real long, having a big result list.
-      if len(results) > HdfsResourceWebHDFS.MAX_FILES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS:
+      if content_summary['ContentSummary']['fileCount'] <= HdfsResourceWebHDFS.MAX_FILES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS
and content_summary['ContentSummary']['directoryCount'] <= HdfsResourceWebHDFS.MAX_DIRECTORIES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS:
+        self._fill_directories_list(self.main_resource.resource.target, results)
+      else: # avoid chmowning a lot of files and listing a lot dirs via webhdfs which can
take a lot of time.
         shell.checked_call(["hadoop", "fs", "-chown", "-R", format("{owner}:{group}"), self.main_resource.resource.target],
user=self.main_resource.resource.user)
-        results = []
 
     if self.main_resource.resource.change_permissions_for_parents:
       self._fill_in_parent_directories(self.main_resource.resource.target, results)
@@ -372,12 +379,12 @@ class HdfsResourceWebHDFS:
     results = []
     
     if self.main_resource.resource.recursive_chmod:
-      self._fill_directories_list(self.main_resource.resource.target, results)
+      content_summary = self.util.run_command(self.main_resource.resource.target, 'GETCONTENTSUMMARY',
method='GET', assertable_result=False)
       
-      # if we don't do this, we can end up waiting real long, having a big result list.
-      if len(results) > HdfsResourceWebHDFS.MAX_FILES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS:
+      if content_summary['ContentSummary']['fileCount'] <= HdfsResourceWebHDFS.MAX_FILES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS
and content_summary['ContentSummary']['directoryCount'] <= HdfsResourceWebHDFS.MAX_DIRECTORIES_FOR_RECURSIVE_ACTION_VIA_WEBHDFS:
+        self._fill_directories_list(self.main_resource.resource.target, results)
+      else: # avoid chmoding a lot of files and listing a lot dirs via webhdfs which can
take a lot of time.
         shell.checked_call(["hadoop", "fs", "-chmod", "-R", self.mode, self.main_resource.resource.target],
user=self.main_resource.resource.user)
-        results = []
       
     if self.main_resource.resource.change_permissions_for_parents:
       self._fill_in_parent_directories(self.main_resource.resource.target, results)


Mime
View raw message