impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
Subject [2/7] incubator-impala git commit: IMPALA-4522: Bound Kudu client threads to avoid stress crash
Date Thu, 24 Nov 2016 08:04:42 GMT
IMPALA-4522: Bound Kudu client threads to avoid stress crash

In stress testing on physical boxes (80 cores, 200gb ram) we
discovered that the Kudu Java client creates a huge number
of threads (2x the #cores) per Kudu client, and this was
causing the impalad to crash when the JVM couldn't create
more threads.

This addresses the issue by setting the number of Kudu
client worker threads rather than letting the Kudu client
pick the default (2 * #cores). The number set here was
suggested by the Kudu team as being sufficient for Impala's
FE usage and this has been tested for 8+ hours on the stress
cluster where the crash was previously observed quickly.

In the future, Impala should probably be sharing a single
Kudu client (it is multithreaded), but additional support
from Kudu may be needed to ensure this usage is correct
(e.g. client metadata may need invalidation after some

Change-Id: I3940df776eaa5ad22e1bbb572559afcc8990bf1d
Reviewed-by: Alex Behm <>
Tested-by: Internal Jenkins


Branch: refs/heads/master
Commit: 1fea9973d2cd4fd61d9377ef9ce4f5accafb41b0
Parents: 3934e13
Author: Matthew Jacobs <>
Authored: Wed Nov 23 11:39:25 2016 -0800
Committer: Internal Jenkins <>
Committed: Thu Nov 24 02:39:30 2016 +0000

 .../java/org/apache/impala/util/   | 28 ++++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/util/ b/fe/src/main/java/org/apache/impala/util/
index dd09a28..559c4a5 100644
--- a/fe/src/main/java/org/apache/impala/util/
+++ b/fe/src/main/java/org/apache/impala/util/
@@ -22,37 +22,42 @@ import static java.lang.String.format;
 import java.util.HashSet;
 import java.util.List;
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.ImpalaRuntimeException;
 import org.apache.impala.common.Pair;
 import org.apache.impala.service.BackendConfig;
-import org.apache.impala.thrift.TExpr;
-import org.apache.impala.thrift.TExprNode;
-import org.apache.impala.analysis.LiteralExpr;
-import org.apache.impala.analysis.Expr;
 import org.apache.impala.thrift.TColumn;
 import org.apache.impala.thrift.TColumnEncoding;
+import org.apache.impala.thrift.TExpr;
+import org.apache.impala.thrift.TExprNode;
 import org.apache.impala.thrift.THdfsCompression;
 import org.apache.kudu.ColumnSchema;
-import org.apache.kudu.ColumnSchema.Encoding;
 import org.apache.kudu.ColumnSchema.CompressionAlgorithm;
+import org.apache.kudu.ColumnSchema.Encoding;
 import org.apache.kudu.Schema;
 import org.apache.kudu.client.KuduClient;
 import org.apache.kudu.client.KuduClient.KuduClientBuilder;
 import org.apache.kudu.client.PartialRow;
 import org.apache.kudu.client.RangePartitionBound;
 public class KuduUtil {
   private static final String KUDU_TABLE_NAME_PREFIX = "impala::";
+  // Number of worker threads created by each KuduClient, regardless of whether or not
+  // they're needed. Impala does not share KuduClients between operations, so the number
+  // of threads created can get very large under concurrent workloads. This number should
+  // be sufficient for the Frontend/Catalog use, and has been tested in stress tests.
+  private static int KUDU_CLIENT_WORKER_THREAD_COUNT = 5;
    * Creates a KuduClient with the specified Kudu master addresses (as a comma-separated
    * list of host:port pairs). The 'admin operation timeout' and the 'operation timeout'
@@ -64,6 +69,7 @@ public class KuduUtil {
     KuduClientBuilder b = new KuduClient.KuduClientBuilder(kuduMasters);

View raw message