From commits-return-127236-archive-asf-public=cust-asf.ponee.io@ignite.apache.org Fri Oct 2 06:44:06 2020 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mxout1-he-de.apache.org (mxout1-he-de.apache.org [95.216.194.37]) by mx-eu-01.ponee.io (Postfix) with ESMTPS id 67556180630 for ; Fri, 2 Oct 2020 08:44:06 +0200 (CEST) Received: from mail.apache.org (mailroute1-lw-us.apache.org [207.244.88.153]) by mxout1-he-de.apache.org (ASF Mail Server at mxout1-he-de.apache.org) with SMTP id 89A0E63F52 for ; Fri, 2 Oct 2020 06:44:05 +0000 (UTC) Received: (qmail 53240 invoked by uid 500); 2 Oct 2020 06:44:04 -0000 Mailing-List: contact commits-help@ignite.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ignite.apache.org Delivered-To: mailing list commits@ignite.apache.org Received: (qmail 53231 invoked by uid 99); 2 Oct 2020 06:44:04 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 02 Oct 2020 06:44:04 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 81AA480873; Fri, 2 Oct 2020 06:44:04 +0000 (UTC) Date: Fri, 02 Oct 2020 06:44:03 +0000 To: "commits@ignite.apache.org" Subject: [ignite] branch IGNITE-7595 updated: Donated the Performance and Troubleshooting Guide from the GridGain to Ignite docs MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <160162104292.21263.13217640602277731198@gitbox.apache.org> From: dmagda@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: ignite X-Git-Refname: refs/heads/IGNITE-7595 X-Git-Reftype: branch X-Git-Oldrev: b8dfebc2b98208db73522d5c2af74105ae0829e7 X-Git-Newrev: 17c2e1bdfca0b327aac1a2d0adf973ea17505ac8 X-Git-Rev: 17c2e1bdfca0b327aac1a2d0adf973ea17505ac8 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. dmagda pushed a commit to branch IGNITE-7595 in repository https://gitbox.apache.org/repos/asf/ignite.git The following commit(s) were added to refs/heads/IGNITE-7595 by this push: new 17c2e1b Donated the Performance and Troubleshooting Guide from the GridGain to Ignite docs 17c2e1b is described below commit 17c2e1bdfca0b327aac1a2d0adf973ea17505ac8 Author: Denis Magda AuthorDate: Thu Oct 1 23:33:48 2020 -0700 Donated the Performance and Troubleshooting Guide from the GridGain to Ignite docs --- docs/_data/toc.yaml | 18 +- .../apache/ignite/snippets/CustomThreadPool.java | 53 +++ .../org/apache/ignite/snippets/FailureHandler.java | 39 ++ docs/_docs/code-snippets/xml/thread-pool.xml | 32 ++ .../general-perf-tips.adoc | 35 ++ .../handling-exceptions.adoc | 234 ++++++++++ docs/_docs/perf-and-troubleshooting/index.adoc | 5 + .../perf-and-troubleshooting/memory-tuning.adoc | 171 +++++++ .../persistence-tuning.adoc | 255 ++++++++++ .../_docs/perf-and-troubleshooting/sql-tuning.adoc | 511 +++++++++++++++++++++ .../thread-pools-tuning.adoc | 103 +++++ .../perf-and-troubleshooting/troubleshooting.adoc | 150 ++++++ docs/_docs/thread-pools.adoc | 136 ------ 13 files changed, 1604 insertions(+), 138 deletions(-) diff --git a/docs/_data/toc.yaml b/docs/_data/toc.yaml index e6db8e7..d8b1279 100644 --- a/docs/_data/toc.yaml +++ b/docs/_data/toc.yaml @@ -518,7 +518,21 @@ url: sql-reference/system-functions - title: Data Types url: sql-reference/data-types -- title: Thread Pools - url: thread-pools - title: Resources Injection url: resources-injection +- title: Performance and Troubleshooting + items: + - title: General Performance Tips + url: /perf-and-troubleshooting/general-perf-tips + - title: Memory and JVM Tuning + url: /perf-and-troubleshooting/memory-tuning + - title: Persistence Tuning + url: /perf-and-troubleshooting/persistence-tuning + - title: SQL Tuning + url: /perf-and-troubleshooting/sql-tuning + - title: Thread Pools Tuning + url: /perf-and-troubleshooting/thread-pools-tuning + - title: Troubleshooting and Debugging + url: /perf-and-troubleshooting/troubleshooting + - title: Handling Exceptions + url: /perf-and-troubleshooting/handling-exceptions diff --git a/docs/_docs/code-snippets/java/src/main/java/org/apache/ignite/snippets/CustomThreadPool.java b/docs/_docs/code-snippets/java/src/main/java/org/apache/ignite/snippets/CustomThreadPool.java new file mode 100644 index 0000000..69a9e24 --- /dev/null +++ b/docs/_docs/code-snippets/java/src/main/java/org/apache/ignite/snippets/CustomThreadPool.java @@ -0,0 +1,53 @@ +package org.apache.ignite.snippets; + +import org.apache.ignite.Ignite; +import org.apache.ignite.Ignition; +import org.apache.ignite.configuration.ExecutorConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.lang.IgniteRunnable; +import org.apache.ignite.resources.IgniteInstanceResource; + +public class CustomThreadPool { + + void customPool() { + + // tag::pool-config[] + IgniteConfiguration cfg = new IgniteConfiguration(); + + cfg.setExecutorConfiguration(new ExecutorConfiguration("myPool").setSize(16)); + // end::pool-config[] + + Ignite ignite = Ignition.start(cfg); + + ignite.compute().run(new OuterRunnable()); + + } + + // tag::inner-runnable[] + public class InnerRunnable implements IgniteRunnable { + @Override + public void run() { + System.out.println("Hello from inner runnable!"); + } + } + // end::inner-runnable[] + + // tag::outer-runnable[] + public class OuterRunnable implements IgniteRunnable { + @IgniteInstanceResource + private Ignite ignite; + + @Override + public void run() { + // Synchronously execute InnerRunnable in a custom executor. + ignite.compute().withExecutor("myPool").run(new InnerRunnable()); + System.out.println("outer runnable is executed"); + } + } + // end::outer-runnable[] + + public static void main(String[] args) { + CustomThreadPool ctp = new CustomThreadPool(); + ctp.customPool(); + } +} diff --git a/docs/_docs/code-snippets/java/src/main/java/org/apache/ignite/snippets/FailureHandler.java b/docs/_docs/code-snippets/java/src/main/java/org/apache/ignite/snippets/FailureHandler.java new file mode 100644 index 0000000..74134cd --- /dev/null +++ b/docs/_docs/code-snippets/java/src/main/java/org/apache/ignite/snippets/FailureHandler.java @@ -0,0 +1,39 @@ +package org.apache.ignite.snippets; + +import java.util.Collections; + +import org.apache.ignite.Ignite; +import org.apache.ignite.Ignition; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.failure.StopNodeFailureHandler; + +public class FailureHandler { + + void configure() { + // tag::configure-handler[] + IgniteConfiguration cfg = new IgniteConfiguration(); + cfg.setFailureHandler(new StopNodeFailureHandler()); + Ignite ignite = Ignition.start(cfg); + // end::configure-handler[] + ignite.close(); + } + + void failureTypes() { + // tag::failure-types[] + StopNodeFailureHandler failureHandler = new StopNodeFailureHandler(); + failureHandler.setIgnoredFailureTypes(Collections.EMPTY_SET); + + IgniteConfiguration cfg = new IgniteConfiguration().setFailureHandler(failureHandler); + + Ignite ignite = Ignition.start(cfg); + // end::failure-types[] + + ignite.close(); + } + + public static void main(String[] args) { + FailureHandler fh = new FailureHandler(); + fh.configure(); + fh.failureTypes(); + } +} diff --git a/docs/_docs/code-snippets/xml/thread-pool.xml b/docs/_docs/code-snippets/xml/thread-pool.xml new file mode 100644 index 0000000..6fda178 --- /dev/null +++ b/docs/_docs/code-snippets/xml/thread-pool.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + 127.0.0.1:47500..47509 + + + + + + + + + + \ No newline at end of file diff --git a/docs/_docs/perf-and-troubleshooting/general-perf-tips.adoc b/docs/_docs/perf-and-troubleshooting/general-perf-tips.adoc new file mode 100644 index 0000000..fe8f4a0 --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/general-perf-tips.adoc @@ -0,0 +1,35 @@ += Generic Performance Tips + +Ignite as distributed storages and platforms require certain optimization techniques. Before you dive +into the more advanced techniques described in this and other articles, consider the following basic checklist: + +* Ignite is designed and optimized for distributed computing scenarios. Deploy and benchmark a multi-node cluster +rather than a single-node one. + +* Ignite can scale horizontally and vertically equally well. +Thus, consider allocating all the CPU and RAM resources available on a local machine to an Ignite node. +A single node per physical machine is a recommended configuration. + +* In cases when Ignite is deployed in a virtual or cloud environment, it's ideal (but not strictly required) to +pin a Ignite node to a single host. This provides two benefits: + +** Avoids the "noisy neighbor" problem where Ignite VM would compete for the host resources with other applications. +This might cause performance spikes on your Ignite cluster. +** Ensures high-availability. If a host goes down and you have two or more Ignite server node VMs pinned to it, then it can lead to data loss. + +* If resources allow, store the entire data set in RAM. Even though Ignite can keep and work with on-disk data, +its architecture is memory-first. In other words, _the more data you cache in RAM the faster the performance_. +link:perf-and-troubleshooting/memory-tuning[Configure and tune] memory appropriately. + +* It might seem counter to the bullet point above but it's not enough just to put data in RAM and expect an +order of magnitude performance improvements. Be ready to adjust your data model and existing applications if any. +Use the link:data-modeling/affinity-collocation[affinity colocation] concept during the data +modelling phase for proper data distribution. For instance, if your data is properly colocated, you can run SQL +queries with JOINs at massive scale and expect significant performance benefits. + +* If Native persistence is used, then follow these link:perf-and-troubleshooting/persistence-tuning[persistence optimization techniques]. + +* If you are going to run SQL with Ignite, then get to know link:perf-and-troubleshooting/sql-tuning[SQL-related optimizations]. + +* Adjust link:data-rebalancing[data rebalancing settings] to ensure that rebalancing completes faster when your cluster topology changes. + diff --git a/docs/_docs/perf-and-troubleshooting/handling-exceptions.adoc b/docs/_docs/perf-and-troubleshooting/handling-exceptions.adoc new file mode 100644 index 0000000..834388c --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/handling-exceptions.adoc @@ -0,0 +1,234 @@ += Handling Exceptions + +This section outlines basic exceptions that can be generated by Ignite, and explains how to set +up and use the critical failures handler. + +== Handling Ignite Exceptions + +Exceptions supported by the Ignite API and actions you can take related to these exceptions are described below. +Please see the Javadoc _throws_ clause for checked exceptions. + +[cols="25%,35%,30%,10%", width="100%"] +|======================================================================= +|Exception |Description |Action |Runtime exception + +| `CacheInvalidStateException` +| Thrown when you try to perform an operation on a cache in which some partitions have been lost. Depending on the partition +loss policy configured for the cache, this exception is thrown either on read and/or write operations. +See link:partition-loss-policy[Partition Loss Policy] for details. +| Reset lost partitions. You may want to restore the data by returning the nodes that caused the partition loss to the cluster. +| Yes + +|`IgniteException` +|Indicates an error condition in the cluster. +|Operation failed. Exit from the method. +|Yes + +|`IgniteClientDisconnectedException` +|Thrown by the Ignite API when a client node gets disconnected from cluster. Thrown from Cache operations, compute API, and data structures. +|Wait and use retry logic. +|Yes +|`IgniteAuthenticationException` +|Thrown when there is either a node authentication failure or security authentication failure. +|Operation failed. Exit from the method. +|No +|`IgniteClientException` +|Can be thrown from Cache operations. +|Check exception message for the action to be taken. +|Yes +|`IgniteDeploymentException` +|Thrown when the Ignite API fails to deploy a job or task on a node. Thrown from the Compute API. +|Operation failed. Exit from the method. +|Yes +|`IgniteInterruptedException` +|Used to wrap the standard `InterruptedException` into `IgniteException`. +|Retry after clearing the interrupted flag. +|Yes +|`IgniteSpiException` +|Thrown by various SPI (`CollisionSpi`, `LoadBalancingSpi`, `TcpDiscoveryIpFinder`, `FailoverSpi`, `UriDeploymentSpi`, etc.) +|Operation failed. Exit from the method. +|Yes +|`IgniteSQLException` +|Thrown when there is a SQL query processing error. This exception also provides query specific error codes. +|Operation failed. Exit from the method. +|Yes +|`IgniteAccessControlException` +|Thrown when there is an authentication / authorization failure. +|Operation failed. Exit from the method. +|No +|`IgniteCacheRestartingException` +|Thrown from Ignite cache API if a cache is restarting. +|Wait and use retry logic. +|Yes +|`IgniteFutureTimeoutException` +|Thrown when a future computation is timed out. +|Either increase timeout limit or exit from the method. +|Yes +|`IgniteFutureCancelledException` +|Thrown when a future computation cannot be retrieved because it was cancelled. +|Use retry logic. +|Yes +|`IgniteIllegalStateException` +|Indicates that the Ignite instance is in an invalid state for the requested operation. +|Operation failed. Exit from the method. +|Yes +|`IgniteNeedReconnectException` +|Indicates that a node should try to reconnect to the cluster. +|Use retry logic. +|No +|`IgniteDataIntegrityViolationException` +|Thrown if a data integrity violation is found. +|Operation failed. Exit from the method. +|Yes +|`IgniteOutOfMemoryException` +|Thrown when the system does not have enough memory to process Ignite operations. Thrown from Cache operations. +|Operation failed. Exit from the method. +|Yes +|`IgniteTxOptimisticCheckedException` +|Thrown when a transaction fails optimistically. +|Use retry logic. +|No +|`IgniteTxRollbackCheckedException` +|Thrown when a transaction has been automatically rolled back. +|Use retry logic. +|No +|`IgniteTxTimeoutCheckedException` +|Thrown when a transaction times out. +|Use retry logic. +|No +|`ClusterTopologyException` +|Indicates an error with the cluster topology (e.g. crashed node, etc.). Thrown from Compute and Events API +|Wait on future and use retry logic. +|Yes +|======================================================================= + +== Critical Failures Handling + +Ignite is a robust and fault tolerant system. But in the real world, some unpredictable issues and problems arise +that can affect the state of both an individual node as well as the whole cluster. Such issues can be detected at +runtime and handled accordingly using a preconfigured critical failure handler. + +=== Critical Failures + +The following failures are treated as critical: + +* System critical errors (e.g. `OutOfMemoryError`). + +* Unintentional system worker termination (e.g. due to an unhandled exception). + +* System workers hanging. + +* Cluster nodes segmentation. + +A system critical error is an error which leads to the system's inoperability. For example: + +* File I/O errors - usually `IOException` is thrown by file read/write operations. It's possible when Ignite +native persistence is enabled (e.g., in cases when no space is left or on a device error), and also for in-memory +mode because Ignite uses disk storage for keeping some metadata (e.g., in cases when the file descriptors limit is +exceeded or file access is prohibited). + +* Out of memory error - when Ignite memory management system fails to allocate more space +(`IgniteOutOfMemoryException`). + +* Out of memory error - when a cluster node runs out of Java heap (`OutOfMemoryError`). + +=== Failures Handling + +When Ignite detects a critical failure, it handles the failure according to a preconfigured failure handler. +The failure handler can be configured as follows: + +:javaFile: code-snippets/java/src/main/java/org/apache/ignite/snippets/FailureHandler.java + +[tabs] +-- +tab:XML[] +[source,xml] +---- + + + + + +---- +tab:Java[] +[source,java] +---- +include::{javaFile}[tag=configure-handler,indent=0] +---- +-- + +Ignite support following failure handlers: + +[width=100%,cols="30%,70%"] +|======================================================================= +|Class |Description + +|`NoOpFailureHandler` +|Ignores any failures. Useful for testing and debugging. +|`RestartProcessFailureHandler` +|A specific implementation that can be used only with `ignite.sh\|bat`. The process must be terminated by using the `Ignition.restart(true)` method. +|`StopNodeFailureHandler` +|Stops the node in case of critical errors by calling the `Ignition.stop(true)` or `Ignition.stop(nodeName, true)` methods. +|`StopNodeOrHaltFailureHandler` +|This is the default handler, which tries to stop a node. If the node can't be stopped, then the handler terminates the JVM process. + +|======================================================================= + +=== Critical Workers Health Check + +Ignite has a number of internal workers that are essential for the cluster to function correctly. If one of them is +terminated, the node can become inoperative. + +The following system workers are considered mission critical: + +* Discovery worker - discovery events handling. +* TCP communication worker - peer-to-peer communication between nodes. +* Exchange worker - partition map exchange. +* Workers of the system's striped pool. +* Data Streamer striped pool workers. +* Timeout worker - timeouts handling. +* Checkpoint thread - check-pointing in Ignite persistence. +* WAL workers - write-ahead logging, segments archiving, and compression. +* Expiration worker - TTL based expiration. +* NIO workers - base networking. + +Ignite has an internal mechanism for verifying that critical workers are operational. +Each worker is regularly checked to confirm that it is alive and updating its heartbeat timestamp. +If a worker is not alive and updating, the worker is regarded as blocked and Ignite will print a message to the log file. +You can set the period of inactivity via the `IgniteConfiguration.systemWorkerBlockedTimeout` property. + +Even though Ignite considers an unresponsive system worker to be a critical error, it doesn't handle this situation automatically, +other than printing out a message to the log file. +If you want to enable a particular failure handler for unresponsive system workers of all the types, clear the +`ignoredFailureTypes` property of the handler as shown below: + +[tabs] +-- +tab:XML[] +[source,xml] +---- + + + + + + + + + + + + + + + + + +---- +tab:Java[] +[source,java] +---- +include::{javaFile}[tag=failure-types,indent=0] +---- +-- + diff --git a/docs/_docs/perf-and-troubleshooting/index.adoc b/docs/_docs/perf-and-troubleshooting/index.adoc new file mode 100644 index 0000000..e7083e1 --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/index.adoc @@ -0,0 +1,5 @@ +--- +layout: toc +--- + += Performance and Troubleshooting Guide diff --git a/docs/_docs/perf-and-troubleshooting/memory-tuning.adoc b/docs/_docs/perf-and-troubleshooting/memory-tuning.adoc new file mode 100644 index 0000000..3bd3993 --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/memory-tuning.adoc @@ -0,0 +1,171 @@ += Memory and JVM Tuning + +This article provides best practices for memory tuning that are relevant for deployments with and without native persistence or an external storage. +Even though Ignite stores data and indexes off the Java heap, Java heap is still used to store objects generated by +queries and operations executed by your applications. +Thus, certain recommendations should be considered for JVM and garbage collection (GC) related optimizations. + +[NOTE] +==== +[discrete] +Refer to link:perf-and-troubleshooting/persistence-tuning[persistence] tuning article for disk-related +optimization practices. +==== + +== Tune Swappiness Setting + +An operating system starts swapping pages from RAM to disk when overall RAM usage hits a certain threshold. +Swapping can impact Ignite cluster performance. +You can adjust the operating system's setting to prevent this from happening. +For Unix, the best option is to either decrease the `vm.swappiness` parameter to `10`, or set it to `0` if native persistence is enabled: + +[source,shell] +---- +sysctl -w vm.swappiness=0 +---- + +The value of this setting can prolong GC pauses as well. For instance, if your GC logs show `low user time, high +system time, long GC pause` records, it might be caused by Java heap pages being swapped in and out. To +address this, use the `swappiness` settings above. + +== Share RAM with OS and Apps + +An individual machine's RAM is shared among the operating system, Ignite, and other applications. +As a general recommendation, if an Ignite cluster is deployed in pure in-memory mode (native +persistence is disabled), then you should not allocate more than 90% of RAM capacity to Ignite nodes. + +On the other hand, if native persistence is used, then the OS requires extra RAM for its page cache in order to optimally sync up data to disk. +If the page cache is not disabled, then you should not give more than 70% of the server's RAM to Ignite. + +Refer to link:memory-configuration/data-regions[memory configuration] for configuration examples. + +In addition to that, because using native persistence might cause high page cache utilization, the `kswapd` daemon might not keep up with page reclamation, which is used by the page cache in the background. +As a result, this can cause high latencies due to direct page reclamation and lead to long GC pauses. + +To work around the effects caused by page memory reclamation on Linux, add extra bytes between `wmark_min` and `wmark_low` with `/proc/sys/vm/extra_free_kbytes`: + +[source,shell] +---- +sysctl -w vm.extra_free_kbytes=1240000 +---- + +Refer to link:https://events.static.linuxfound.org/sites/events/files/lcjp13_moriya.pdf[this resource, window=_blank] +for more insight into the relationship between page cache settings, high latencies, and long GC pauses. + +== Java Heap and GC Tuning + +Even though Ignite and Ignite keep data in their own off-heap memory regions invisible to Java garbage collectors, Java +Heap is still used for objects generated by your applications workloads. +For instance, whenever you run SQL queries against an Ignite cluster, the queries will access data and indexes stored in +the off-heap memory while the result sets of such queries will be kept in Java Heap until your application reads the result sets. +Thus, depending on the throughput and type of operations, Java Heap can still be utilized heavily and this might require +JVM and GC related tuning for your workloads. + +We've included some common recommendations and best practices below. +Feel free to start with them and make further adjustments as necessary, depending on the specifics of your applications. + +[NOTE] +==== +[discrete] +Refer to link:perf-and-troubleshooting/troubleshooting#debugging-gc-issues[GC debugging techniques] sections for best +practices on GC logs and heap dumps collection. +==== + +=== Generic GC Settings + +Below are sets of example JVM configurations for applications that can utilize Java Heap on server nodes heavily, thus +triggering long — or frequent, short — stop-the-world GC pauses. + +For JDK 1.8+ deployments you should use G1 garbage collector. +The settings below are a good starting point if 10GB heap is more than enough for your server nodes: + +[source,shell] +---- +-server +-Xms10g +-Xmx10g +-XX:+AlwaysPreTouch +-XX:+UseG1GC +-XX:+ScavengeBeforeFullGC +-XX:+DisableExplicitGC +---- + +If G1 does not work for you, consider using CMS collector and starting with the following settings. +Note that 10GB heap is used as an example and a smaller heap can be enough for your use case: + +[source,shell] +---- +-server +-Xms10g +-Xmx10g +-XX:+AlwaysPreTouch +-XX:+UseParNewGC +-XX:+UseConcMarkSweepGC +-XX:+CMSClassUnloadingEnabled +-XX:+CMSPermGenSweepingEnabled +-XX:+ScavengeBeforeFullGC +-XX:+CMSScavengeBeforeRemark +-XX:+DisableExplicitGC +---- + +[NOTE] +==== +//TODO: Is this still valid? What does it do? +If you use link:persistence/native-persistence[Ignite native persistence], we recommend that you set the +`MaxDirectMemorySize` JVM parameter to `walSegmentSize * 4`. +With the default WAL settings, this value is equal to 256MB. +==== + +=== Advanced Memory Tuning + +In Linux and Unix environments, it's possible that an application can face long GC pauses or lower performance due to +I/O or memory starvation due to kernel specific settings. +This section provides some guidelines on how to modify kernel settings in order to overcome long GC pauses. + +[WARNING] +==== +[discrete] +All the shell commands given below were tested on RedHat 7. +They may differ for your Linux distribution. +Before changing the kernel settings, make sure to check the system statistics/logs to confirm that you really have a problem. +Consult your IT department before making changes at the Linux kernel level in production. +==== + +If GC logs show `low user time, high system time, long GC pause` then most likely memory constraints are triggering swapping or scanning of a free memory space. + +* Check and adjust the link:perf-and-troubleshooting/memory-tuning#tune-swappiness-setting[swappiness settings]. +* Add `-XX:+AlwaysPreTouch` to JVM settings on startup. +* Disable NUMA zone-reclaim optimization. ++ +[source,shell] +---- +sysctl -w vm.zone_reclaim_mode=0 +---- + +* Turn off Transparent Huge Pages if RedHat distribution is used. ++ +[source,shell] +---- +echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled +echo never > /sys/kernel/mm/redhat_transparent_hugepage/defrag +---- + +=== Advanced I/O Tuning + +If GC logs show `low user time, low system time, long GC pause` then GC threads might be spending too much time in the kernel space being blocked by various I/O activities. +For instance, this can be caused by journal commits, gzip, or log roll over procedures. + +As a solution, you can try changing the page flushing interval from the default 30 seconds to 5 seconds: + +[source,shell] +---- +sysctl -w vm.dirty_writeback_centisecs=500 +sysctl -w vm.dirty_expire_centisecs=500 +---- + +[NOTE] +==== +[discrete] +Refer to the link:perf-and-troubleshooting/persistence-tuning[persistence tuning] section for the optimizations related to disk. +Those optimizations can have a positive impact on GC. +==== diff --git a/docs/_docs/perf-and-troubleshooting/persistence-tuning.adoc b/docs/_docs/perf-and-troubleshooting/persistence-tuning.adoc new file mode 100644 index 0000000..6dc80fd --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/persistence-tuning.adoc @@ -0,0 +1,255 @@ += Persistence Tuning +:javaFile: code-snippets/java/src/main/java/org/apache/ignite/snippets/PersistenceTuning.java +:xmlFile: code-snippets/xml/persistence-tuning.xml +:dotnetFile: code-snippets/dotnet/PersistenceTuning.cs + +This article summarizes best practices for Ignite native persistence tuning. +If you are using an external (3rd party) storage for persistence needs, please refer to performance guides from the 3rd party vendor. + +== Adjusting Page Size + +The `DataStorageConfiguration.pageSize` parameter should be no less than the lower of: the page size of your storage media (SSD, Flash, HDD, etc.) and the cache page size of your operating system. +The default value is 4KB. + +The operating system's cache page size can be easily checked using +link:https://unix.stackexchange.com/questions/128213/how-is-page-size-determined-in-virtual-address-space[system tools and parameters, window=_blank]. + +The page size of the storage device such as SSD is usually noted in the device specification. If the manufacturer does +not disclose this information, try to run SSD benchmarks to figure out the number. +Many manufacturers have to adapt their drivers for 4 KB random-write workloads because a variety of standard +benchmarks use 4 KB by default. +link:https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ssd-server-storage-applications-paper.pdf[This white paper,window=_blank] +from Intel confirms that 4 KB should be enough. + +Once you pick the most optimal page size, apply it in your cluster configuration: + +//// +TODO for .NET and other languages. +//// + +[tabs] +-- +tab:XML[] +[source,xml] +---- +include::{xmlFile}[tags=!*;ignite-config;ds;page-size,indent=0] +---- +tab:Java[] +[source,java] +---- +include::{javaFile}[tag=page-size,indent=0] +---- +tab:C#/.NET[] +[source,csharp] +---- +include::{dotnetFile}[tag=page-size,indent=0] +---- +tab:C++[unsupported] +-- + +== Keep WALs Separately + +Consider using separate drives for data files and link:persistence/native-persistence#write-ahead-log[Write-Ahead-Logging (WAL)]. +Ignite actively writes to both the data and WAL files. + +The example below shows how to configure separate paths for the data storage, WAL, and WAL archive: + +[tabs] +-- +tab:XML[] +[source,xml] +---- +include::{xmlFile}[tags=!*;ignite-config;ds;paths,indent=0] +---- +tab:Java[] +[source,java] +---- +include::{javaFile}[tag=separate-wal,indent=0] +---- +tab:C#/.NET[] +[source,csharp] +---- +include::{dotnetFile}[tag=separate-wal,indent=0] +---- +tab:C++[unsupported] +-- + +== Increasing WAL Segment Size + +The default WAL segment size (64 MB) may be inefficient in high load scenarios because it causes WAL to switch between segments too frequently and switching/rotation is a costly operation. Setting the segment size to a higher value (up to 2 GB) may help reduce the number of switching operations. However, the tradeoff is that this will increase the overall volume of the write-ahead log. + +See link:persistence/native-persistence#changing-wal-segment-size[Changing WAL Segment Size] for details. + +== Changing WAL Mode + +Consider other WAL modes as alternatives to the default mode. Each mode provides different degrees of reliability in +case of node failure and that degree is inversely proportional to speed, i.e. the more reliable the WAL mode, the +slower it is. Therefore, if your use case does not require high reliability, you can switch to a less reliable mode. + +See link:persistence/native-persistence#wal-modes[WAL Modes] for more details. + +== Disabling WAL + +//TODO: when should bhis be done? +There are situations where link:persistence/native-persistence#disabling-wal[disabling the WAL] can help improve performance. + +== Pages Writes Throttling + +Ignite periodically starts the link:persistence/native-persistence#checkpointing[checkpointing process] that syncs +dirty pages from memory to disk. A dirty page is a page that was updated in RAM but was not written to a respective +partition file (an update was just appended to the WAL). This process happens in the background without affecting the application's logic. + +However, if a dirty page, scheduled for checkpointing, is updated before being written to disk, its previous state is +copied to a special region called a checkpointing buffer. +If the buffer gets overflowed, Ignite will stop processing all updates until the checkpointing is over. +As a result, write performance can drop to zero as shown in​ this diagram, until the checkpointing cycle is completed: + +image::images/checkpointing-chainsaw.png[Checkpointing Chainsaw] + +The same situation occurs if the dirty pages threshold is reached again while the checkpointing is in progress. +This will force Ignite to schedule one more checkpointing execution and to halt all the update operations until the first checkpointing cycle is over. + +Both situations usually arise when either a disk device is slow or the update rate is too intensive. +To mitigate and prevent these performance drops, consider enabling the pages write throttling algorithm. +The algorithm brings the performance of update operations down to the speed of the disk device whenever the checkpointing buffer fills in too fast or the percentage of dirty pages soar rapidly. + +[NOTE] +==== +[discrete] +=== Pages Write Throttling in a Nutshell + +Refer to the link:https://cwiki.apache.org/confluence/display/IGNITE/Ignite+Persistent+Store+-+under+the+hood#IgnitePersistentStore-underthehood-PagesWriteThrottling[Ignite wiki page, window=_blank] +maintained by Apache Ignite persistence experts to get more details about throttling and its causes. +==== + +The example below shows how to enable write throttling: + +[tabs] +-- +tab:XML[] +[source,xml] +---- +include::{xmlFile}[tags=!*;ignite-config;ds;page-write-throttling,indent=0] +---- +tab:Java[] +[source,java] +---- +include::{javaFile}[tag=throttling,indent=0] +---- +tab:C#/.NET[] +[source,csharp] +---- +include::{dotnetFile}[tag=throttling,indent=0] +---- +tab:C++[unsupported] +-- + +== Adjusting Checkpointing Buffer Size + +The size of the checkpointing buffer, explained in the previous section, is one of the checkpointing process triggers. + +The default buffer size is calculated as a function of the link:memory-configuration/data-regions[data region] size: + +[width=100%,cols="1,2",options="header"] +|======================================================================= +| Data Region Size |Default Checkpointing Buffer Size + +|< 1 GB | MIN (256 MB, Data_Region_Size) + +|between 1 GB and 8 GB | Data_Region_Size / 4 + +|> 8 GB | 2 GB + +|======================================================================= + +The default buffer size can be suboptimal for write-intensive workloads because the page write +throttling algorithm will slow down your writes whenever the size reaches the critical mark. To keep write +performance at the desired pace while the checkpointing is in progress, consider increasing +`DataRegionConfiguration.checkpointPageBufferSize` and enabling write throttling to prevent performance​ drops: + +[tabs] +-- +tab:XML[] +[source,xml] +---- +include::{xmlFile}[tags=!*;ignite-config;ds;page-write-throttling;data-region,indent=0] +---- +tab:Java[] +[source,java] +---- +include::{javaFile}[tag=checkpointing-buffer-size,indent=0] +---- +tab:C#/.NET[] +[source,csharp] +---- +include::{dotnetFile}[tag=checkpointing-buffer-size,indent=0] +---- +tab:C++[unsupported] +-- + +In the example above, the checkpointing buffer size of the default region is set to 1 GB. + +//// +TODO: describe when checkpointing is triggered +[NOTE] +==== +[discrete] +=== When is the Checkpointing Process Triggered? + +Checkpointing is started if either the dirty pages count goes beyond the `totalPages * 2 / 3` value or +`DataRegionConfiguration.checkpointPageBufferSize` is reached. However, if page write throttling is used, then +`DataRegionConfiguration.checkpointPageBufferSize` is never encountered because it cannot be reached due to the way the algorithm works. +==== +//// + +== Enabling Direct I/O +//TODO: why is this not enabled by default? +Usually, whenever an application reads data from disk, the OS gets the data and puts it in a file buffer cache first. +Similarly, for every write operation, the OS first writes the data in the cache and transfers it to disk later. To +eliminate this process, you can enable Direct I/O in which case the data is read and written directly from/to the +disk, bypassing the file buffer cache. + +The Direct I/O module in Ignite is used to speed up the checkpointing process, which writes dirty pages from RAM to disk. +Consider using the Direct I/O plugin for write-intensive workloads. + +[NOTE] +==== +[discrete] +=== Direct I/O and WALs + +Note that Direct I/O cannot be enabled specifically for WAL files. However, enabling the Direct I/O module provides +a slight benefit regarding the WAL files as well: the WAL data will not be stored in the OS buffer cache for too long; +it will be flushed (depending on the WAL mode) at the next page cache scan and removed from the page cache. +==== + +You can enable Direct I/O, move the `{ignite_dir}/libs/optional/ignite-direct-io` folder to the upper level `libs/optional/ignite-direct-io` +folder in your Ignite distribution or as a Maven dependency as described link:setup#enabling-modules[here]. + +You can use the `IGNITE_DIRECT_IO_ENABLED` system property to enable or disable the plugin at runtime. + +Get more details from the link:https://cwiki.apache.org/confluence/display/IGNITE/Ignite+Persistent+Store+-+under+the+hood#IgnitePersistentStore-underthehood-DirectI/O[Ignite Direct I/O Wiki section, window=_blank]. + +== Purchase Production-Level SSDs + +Note that the performance of Ignite Native Persistence may drop after several hours of intensive write load due to +the nature of how +link:http://codecapsule.com/2014/02/12/coding-for-ssds-part-2-architecture-of-an-ssd-and-benchmarking[SSDs are designed and operate, window=_blank]. +Consider buying fast production-level SSDs to keep the performance high or switch to non-volatile memory devices like +Intel Optane Persistent Memory. + +== SSD Over-provisioning + +Performance of random writes on a 50% filled disk is much better than on a 90% filled disk because of the SSDs over-provisioning +(see link:https://www.seagate.com/tech-insights/ssd-over-provisioning-benefits-master-ti[https://www.seagate.com/tech-insights/ssd-over-provisioning-benefits-master-ti, window=_blank]). + +Consider buying SSDs with higher over-provisioning rates and make sure the manufacturer provides the tools to adjust it. + +[NOTE] +==== +[discrete] +=== Intel 3D XPoint + +Consider using 3D XPoint drives instead of regular SSDs to avoid the bottlenecks caused by a low over-provisioning +setting and constant garbage collection at the SSD level. +Read more link:http://dmagda.blogspot.com/2017/10/3d-xpoint-outperforms-ssds-verified-on.html[here, window=_blank]. +==== diff --git a/docs/_docs/perf-and-troubleshooting/sql-tuning.adoc b/docs/_docs/perf-and-troubleshooting/sql-tuning.adoc new file mode 100644 index 0000000..3521ee1 --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/sql-tuning.adoc @@ -0,0 +1,511 @@ += SQL Performance Tuning + +This article outlines basic and advanced optimization techniques for Ignite SQL queries. Some of the sections are also useful for debugging and troubleshooting. + +== Basic Considerations: Ignite vs RDBMS + +Ignite is frequently compared to relational databases for their SQL capabilities with an expectation that existing SQL +queries, created for an RDBMS, will work out of the box and perform faster in Ignite without any +changes. Usually, such an assumption is based on the fact that Ignite stores and processes data in-memory. +However, it's not enough just to put data in RAM and expect an order of magnitude increase in performance. Generally, +extra tuning is required. Below you can see a standard checklist of +best practices to consider before you benchmark Ignite against an RDBMS or do any performance testing: + +* Ignite is optimized for _multi-nodes_ deployments with RAM as a primary storage. Don't +try to compare a single-node Ignite cluster to a relational database. You should deploy a multi-node Ignite cluster with the whole copy of data in RAM. + +* Be ready to adjust your data model and existing SQL queries. +Use the link:data-modeling/affinity-collocation[affinity colocation] concept during the data +modelling phase for proper data distribution. Remember, it's not enough just to put data in RAM. If your data is properly colocated, you can run SQL queries with JOINs at massive scale and expect significant performance benefits. + +* Define secondary indexes and use other standard, and Ignite-specific, tuning techniques described below. + +* Keep in mind that relational databases leverage local caching techniques and, depending on the total data size, an +RDBMS can complete _some queries_ even faster than Ignite even in a multi-node configuration. +If your data set is around 10-100GB and an RDBMS has enough RAM for caching data locally than it, for instance, can +outperform a multi-node Ignite cluster because the latter will be utilizing the network. Store much more data in Ignite to see the difference. + + +== Using the EXPLAIN Statement + +Ignite supports the `EXPLAIN` statement which could be used to read the execution plan of a query. +Use this command to analyse your queries for possible optimization. Note that the plan will contain multiple rows: the +last one will contain a query for the reducing side (usually your application), others are for map nodes (usually server nodes). +Read the link:SQL/sql-introduction#distributed-queries[Distributed Queries] section to learn how queries are executed in Ignite. + +[source,sql] +---- +EXPLAIN SELECT name FROM Person WHERE age = 26; +---- + +The execution plan is generated by H2 as described link:http://www.h2database.com/html/performance.html#explain_plan[here, window=_blank]. + +== OR Operator and Selectivity + +//*TODO*: is this still valid? + +If a query contains an `OR` operator, then indexes may not be used as expected depending on the complexity of the query. +For example, for the query `select name from Person where gender='M' and (age = 20 or age = 30)`, an index on the `gender` +field will be used instead of an index on the `age` field, although the latter is a more selective index. +As a workaround for this issue, you can rewrite the query with `UNION ALL` (notice that `UNION` without `ALL` will return +`DISTINCT` rows, which will change the query semantics and will further penalize your query performance): + +[source,sql] +---- +SELECT name FROM Person WHERE gender='M' and age = 20 +UNION ALL +SELECT name FROM Person WHERE gender='M' and age = 30 +---- + +== Avoid Having Too Many Columns + +Avoid having too many columns in the result set of a `SELECT` query. Due to limitations of the H2 query parser, queries +with 100+ columns may perform worse than expected. + +== Lazy Loading + +By default, Ignite attempts to load the whole result set to memory and send it back to the query initiator (which is +usually your application). This approach provides optimal performance for queries of small or medium result sets. +However, if the result set is too big to fit in the available memory, it can lead to prolonged GC pauses and even `OutOfMemoryError` exceptions. + +To minimize memory consumption, at the cost of a moderate performance hit, you can load and process the result sets +lazily by passing the `lazy` parameter to the JDBC and ODBC connection strings or use a similar method available for Java, .NET, and C++ APIs: + +[tabs] +-- + +tab:Java[] +[source,java] +---- +SqlFieldsQuery query = new SqlFieldsQuery("SELECT * FROM Person WHERE id > 10"); + +// Result set will be loaded lazily. +query.setLazy(true); +---- +tab:JDBC[] +[source,sql] +---- +jdbc:ignite:thin://192.168.0.15?lazy=true +---- +tab:C#/.NET[] +[source,csharp] +---- +var query = new SqlFieldsQuery("SELECT * FROM Person WHERE id > 10") +{ + // Result set will be loaded lazily. + Lazy = true +}; +---- +tab:C++[] +-- + +//// +*TODO* Add tabs for ODBC and other programming languages - C# and C++ +//// + +== Querying Colocated Data + +When Ignite executes a distributed query, it sends sub-queries to individual cluster nodes to fetch the data and groups +the results on the reducer node (usually your application). +If you know in advance that the data you are querying is link:data-modeling/affinity-collocation[colocated] +by the `GROUP BY` condition, you can use `SqlFieldsQuery.collocated = true` to tell the SQL engine to do the grouping on the remote nodes. +This will reduce network traffic between the nodes and query execution time. +When this flag is set to `true`, the query is executed on individual nodes first and the results are sent to the reducer node for final calculation. + +Consider the following example, in which we assume that the data is colocated by `department_id` (in other words, the +`department_id` field is configured as the affinity key). + +[source,sql] +---- +SELECT SUM(salary) FROM Employee GROUP BY department_id +---- + +Because of the nature of the SUM operation, Ignite will sum the salaries across the elements stored on individual nodes, +and then send these sums to the reducer node where the final result will be calculated. +This operation is already distributed, and enabling the `collocated` flag will only slightly improve performance. + +Let's take a slightly different example: + +[source,sql] +---- +SELECT AVG(salary) FROM Employee GROUP BY department_id +---- + +In this example, Ignite has to fetch all (`salary`, `department_id`) pairs to the reducer node and calculate the results there. +However, if employees are colocated by the `department_id` field, i.e. employee data for the same department +is stored on the same node, setting `SqlFieldsQuery.collocated = true` will reduce query execution time because Ignite +will calculate the averages for each department on the individual nodes and send the results to the reducer node for final calculation. + + +== Enforcing Join Order + +When this flag is set, the query optimizer will not reorder tables in joins. +In other words, the order in which joins are applied during query execution will be the same as specified in the query. +Without this flag, the query optimizer can reorder joins to improve performance. +However, sometimes it might make an incorrect decision. +This flag helps to control and explicitly specify the order of joins instead of relying on the optimizer. + +Consider the following example: + +[source, sql] +---- +SELECT * FROM Person p +JOIN Company c ON p.company = c.name where p.name = 'John Doe' +AND p.age > 20 +AND p.id > 5000 +AND p.id < 100000 +AND c.name NOT LIKE 'O%'; +---- + +This query contains a join between two tables: `Person` and `Company`. +To get the best performance, we should understand which join will return the smallest result set. +The table with the smaller result set size should be given first in the join pair. +To get the size of each result set, let's test each part. + +.Q1: +[source, sql] +---- +SELECT count(*) +FROM Person p +where +p.name = 'John Doe' +AND p.age > 20 +AND p.id > 5000 +AND p.id < 100000; +---- + +.Q2: +[source, sql] +---- +SELECT count(*) +FROM Company c +where +c.name NOT LIKE 'O%'; +---- + +After running Q1 and Q2, we can get two different outcomes: + +Case 1: +[cols="1,1",opts="stretch,autowidth",stripes=none] +|=== +|Q1 | 30000 +|Q2 |100000 +|=== + +Q2 returns more entries than Q1. +In this case, we don't need to modify the original query, because smaller subset has already been located on the left side of the join. + +Case 2: +[cols="1,1",opts="stretch,autowidth",stripes=none] +|=== +|Q1 | 50000 +|Q2 |10000 +|=== + +Q1 returns more entries than Q2. So we need to change the initial query as follows: + +[source, sql] +---- +SELECT * +FROM Company c +JOIN Person p +ON p.company = c.name +where +p.name = 'John Doe' +AND p.age > 20 +AND p.id > 5000 +AND p.id < 100000 +AND c.name NOT LIKE 'O%'; +---- + +The force join order hint can be specified as follows: + +* link:SQL/JDBC/jdbc-driver#parameters[JDBC driver connection parameter] +* link:SQL/ODBC/connection-string-dsn#supported-arguments[ODBC driver connection attribute] +* If you use link:SQL/sql-api[SqlFieldsQuery] to execute SQL queries, you can set the enforce join order +hint by calling the `SqlFieldsQuery.setEnforceJoinOrder(true)` method. + + +== Increasing Index Inline Size + +Every entry in the index has a constant size which is calculated during index creation. This size is called _index inline size_. +Ideally this size should be enough to store full indexed entry in serialized form. +When values are not fully included in the index, Ignite may need to perform additional data page reads during index lookup, +which can impair performance if persistence is enabled. + +//If a value type allows, Ignite includes indexed values in the index itself to optimize querying and data updates. + + +Here is how values are stored in the index: + +// the source code block below uses css-styles from the pygments library. If you change the highlighting library, you should change the syles as well. +[source,java,subs="quotes"] +---- +[tok-kt]#int# +0 1 5 +| tag | value | +[tok-k]#Total: 5 bytes# + +[tok-kt]#long# +0 1 9 +| tag | value | +[tok-k]#Total: 9 bytes# + +[tok-kt]#String# +0 1 3 N +| tag | size | UTF-8 value | +[tok-k]#Total: 3 + string length# + +[tok-kt]#POJO (BinaryObject)# +0 1 5 +| tag | BO hash | +[tok-k]#Total: 5# +---- + +For primitive data types (bool, byte, short, int, etc.), Ignite automatically calculates the index inline size so that the values are included in full. +For example, for `int` fields, the inline size is 5 (1 byte for the tag and 4 bytes for the value itself). For `long` fields, the inline size is 9 (1 byte for the tag + 8 bytes for the value). + +For binary objects, the index includes the hash of each object, which is enough to avoid collisions. The inline size is 5. + +For variable length data, indexes include only first several bytes of the value. +//As you can see, indexes on `Strings` (and other variable-length types) only store first several bytes of the value. +Therefore, when indexing fields with variable-length data, we recommend that you estimate the length of your field values and set the inline size to a value that includes most (about 95%) or all values. +For example, if you have a `String` field with 95% of the values containing 10 characters or fewer, you can set the inline size for the index on that field to 13. + +//For example, when you create a table with a single column primary key, Ignite will automatically create an index on the primary key. + +The inline sizes explained above apply to single field indexes. +However, when you define an index on a field in the value object or on a non-primary key column, Ignite creates a _composite index_ +by appending the primary key to the indexed value. +Therefore, when calculating the inline size for composite indexes, add up the inline size of the primary key. + +//To summarize, when creating indexes on a variable size data fields, choose the inline size to include most of the values that the field will hold. For other data types, Ignite will calculate the inline size automatically. + +Below is an example of index inline size calculation for a cache where both key and value are complex objects. + +[source, java] +---- +public class Key { + @QuerySqlField + private long id; + + @QuerySqlField + @AffinityKeyMapped + private long affinityKey; +} + +public class Value { + @QuerySqlField(index = true) + private long longField; + + @QuerySqlField(index = true) + private int intField; + + @QuerySqlField(index = true) + private String stringField; // we suppose that 95% of the values are 10 symbols +} +---- + +The following table summarizes the inline index sizes for the indexes defined in the example above. + +[cols="1,1,1,2",opts="stretch,header"] +|=== +|Index | Kind | Recommended Inline Size | Comment + +| (_key) +|Primary key index +| 5 +|Inlined hash of a binary object (5) + +|(affinityKey, _key) +|Affinity key index +|14 +|Inlined long (9) + binary object's hash (5) + +|(longField, _key) +|Secondary index +|14 +|Inlined long (9) + binary object's hash (5) + +|(intField, _key) +|Secondary index +|10 +|Inlined int (5) + binary object up to hash (5) + +|(stringField, _key) +|Secondary index +|18 +|Inlined string (13) + binary object's hash (5) (assuming that the string is {tilde}10 symbols) + +|=== +//_ + +//The inline size for the first two indexes is set via `CacheConfiguration.sqlIndexMaxInlineSize = 29` (because a single property is responsible for two indexes, we set it to the largest value). +//The inline size for the rest of the indexes is set when you define a corresponding index. +Note that you will only have to set the inline size for the index on `stringField`. For other indexes, Ignite will calculate the inline size automatically. + +Refer to the link:SQL/indexes#configuring-index-inline-size[Configuring Index Inline Size] section for the information on how to change the inline size. + +You can check the inline size of an existing index in the link:monitoring-metrics/system-views#indexes-view[INDEXES] system view. + +[WARNING] +==== +Note that since Ignite encodes strings to `UTF-8`, some characters use more than 1 byte. +==== + +== Query Parallelism + +By default, a SQL query is executed in a single thread on each participating Ignite node. This approach is optimal for +queries returning small result sets involving index search. For example: + +[source,sql] +---- +SELECT * FROM Person WHERE p.id = ?; +---- + +Certain queries might benefit from being executed in multiple threads. +This relates to queries with table scans and aggregations, which is often the case for HTAP and OLAP workloads. +For example: + +[source,sql] +---- +SELECT SUM(salary) FROM Person; +---- + +The number of threads created on a single node for query execution is configured per cache and by default equals 1. +You can change the value by setting the `CacheConfiguration.queryParallelism` parameter. +If you create SQL tables using the CREATE TABLE command, you can use a link:configuring-caches/configuration-overview#cache-templates[cache template] to set this parameter. + +If a query contains `JOINs`, then all the participating caches must have the same degree of parallelism. + +== Index Hints + +Index hints are useful in scenarios when you know that one index is more suitable for certain queries than another. +You can use them to instruct the query optimizer to choose a more efficient execution plan. +To do this, you can use `USE INDEX(indexA,...,indexN)` statement as shown in the following example. + + +[source,sql] +---- +SELECT * FROM Person USE INDEX(index_age) +WHERE salary > 150000 AND age < 35; +---- + + +== Partition Pruning + +Partition pruning is a technique that optimizes queries that use affinity keys in the `WHERE` condition. When +executing such a query, Ignite will scan only those partitions where the requested data is stored. This will reduce +query time because the query will be sent only to the nodes that store the requested partitions. + +In the following example, the employee objects are colocated by the `id` field (if an affinity key is not set +explicitly then the primary key is used as the affinity key): + + +[source,sql] +---- +CREATE TABLE employee (id BIGINT PRIMARY KEY, department_id INT, name VARCHAR) + +/* This query is sent to the node where the requested key is stored */ +SELECT * FROM employee WHERE id=10; + +/* This query is sent to all nodes */ +SELECT * FROM employee WHERE department_id=10; +---- + +In the next example, the affinity key is set explicitly and, therefore, will be used to colocate data and direct +queries to the nodes that keep primary copies of the data: + + +[source,sql] +---- +CREATE TABLE employee (id BIGINT PRIMARY KEY, department_id INT, name VARCHAR) WITH "AFFINITY_KEY=department_id" + +/* This query is sent to all nodes */ +SELECT * FROM employee WHERE id=10; + +/* This query is sent to the node where the requested key is stored */ +SELECT * FROM employee WHERE department_id=10; +---- + + +[NOTE] +==== +Refer to link:data-modeling/affinity-collocation[affinity colocation] page for more details +on how data gets colocated and how it helps boost performance in distributed storages like Ignite. +==== + +== Skip Reducer on Update + +When Ignite executes a DML operation, it first fetches all the affected intermediate rows for analysis to the reducer +node (usually your application), and only then prepares batches of updated values that will be sent to remote nodes. + +This approach might affect performance and saturate the network if a DML operation has to move many entries. + +Use this flag as a hint for the SQL engine to do all intermediate rows analysis and updates “in-place” on the server nodes. +The hint is supported for JDBC and ODBC connections. + + +[tabs] +-- +tab:JDBC Connection String[] +[source,text] +---- +//jdbc connection string +jdbc:ignite:thin://192.168.0.15/skipReducerOnUpdate=true +---- +-- + + +//// +*TODO* Add tabs for ODBC and other programming languages - C# and C++ +//// + +== SQL On-heap Row Cache + +Ignite stores data and indexes in its own memory space outside of Java heap. This means that with every data +access, a part of the data will be copied from the off-heap space to Java heap, potentially deserialized, and kept in +the heap as long as your application or server node references it. + +The SQL on-heap row cache is intended to store hot rows (key-value objects) in Java heap, minimizing resources +spent for data copying and deserialization. Each cached row refers to an entry in the off-heap region and can be +invalidated when one of the following happens: + +* The master entry stored in the off-heap region is updated or removed. +* The data page that stores the master entry is evicted from RAM. + +The on-heap row cache can be enabled for a specific cache/table (if you use CREATE TABLE to create SQL tables and caches, +then the parameter can be passed via a link:configuring-caches/configuration-overview#cache-templates[cache template]): + +//// +TODO Add tabs for ODBC/JDBC and other programming languages - Java C# and C++ +//// + +[source,xml] +---- +include::code-snippets/xml/sql-on-heap-cache.xml[tags=ignite-config;!discovery,indent=0] +---- + +//// +*TODO* Add tabs for ODBC/JDBC and other programming languages - Java C# and C++ +//// + +If the row cache is enabled, you might be able to trade RAM for performance. You might get up to a 2x performance increase for some SQL queries and use cases by allocating more RAM for rows caching purposes. + +[WARNING] +==== +[discrete] +=== SQL On-Heap Row Cache Size + +Presently, the cache is unlimited and can occupy as much RAM as allocated to your memory data regions. Make sure to: + +* Set the JVM max heap size equal to the total size of all the data regions that store caches for which this on-heap row cache is enabled. + +* link:perf-and-troubleshooting/memory-tuning#java-heap-and-gc-tuning[Tune] JVM garbage collection accordingly. +==== + +== Using TIMESTAMP instead of DATE + +//TODO: is this still valid? +Use the `TIMESTAMP` type instead of `DATE` whenever possible. Presently, the `DATE` type is serialized/deserialized very +inefficiently resulting in performance degradation. diff --git a/docs/_docs/perf-and-troubleshooting/thread-pools-tuning.adoc b/docs/_docs/perf-and-troubleshooting/thread-pools-tuning.adoc new file mode 100644 index 0000000..f9d0555 --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/thread-pools-tuning.adoc @@ -0,0 +1,103 @@ += Thread Pools Tuning + +Ignite creates and maintains a variety of thread pools that are used for different purposes. In this section, we list some of the more common internal pools and show how you can create a custom one. + +//// +Refer to the *TODO Link to APIs/Javadoc/etc.* APIs documentation to get a full list of thread pools available in Ignite. +//// + +== System Pool + +The system pool handles all the cache related operations except for SQL and some other types of queries that go to the queries pool. +Also, this pool is responsible for processing compute tasks' cancellation operations. + +The default pool size is `max(8, total number of cores)`. +Use `IgniteConfiguration.setSystemThreadPoolSize(...)` or a similar API from your programming language to change the pool size. + +== Queries Pool + +The queries pool takes care of all SQL, Scan, and SPI queries being sent and executed across the cluster. + +The default pool size is `max(8, total number of cores)`. +Use `IgniteConfiguration.setQueryThreadPoolSize(...)` or a similar API from your programming language to change the pool size. + +== Public Pool + +Public pool is the work-horse of the Compute Grid. All computations are received and processed by this pool. + +The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setPublicThreadPoolSize(...)` or a similar API from your programming language to change the pool size. + +== Service Pool + +Service Grid calls go to the services' thread pool. +Having dedicated pools for the Service and Compute components allows us to avoid threads starvation and deadlocks when a service implementation wants to call a computation or vice versa. + +The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setServiceThreadPoolSize(...)` or a similar API from your programming language to change the pool size. + +== Striped Pool + +The striped pool helps accelerate basic cache operations and transactions by spreading operations execution across multiple stripes that don't contend with each other for resources. + +The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setStripedPoolSize(...)` or a similar API from your programming language to change the pool size. + +== Data Streamer Pool + +The data streamer pool processes all messages and requests coming from `IgniteDataStreamer` and a variety of streaming adapters that use `IgniteDataStreamer` internally. + +The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setDataStreamerThreadPoolSize(...)` or a similar API from your programming language to change the pool size. + +== Creating Custom Thread Pool + +It is possible to configure a custom thread pool for compute tasks. +This is useful if you want to execute one compute task from another synchronously avoiding deadlocks. +To guarantee this, you need to make sure that a nested task is executed in a thread pool separate from the parent's tasks thread pool. + +A custom pool is defined in `IgniteConfiguration` and must have a unique name: + +:javaFile: code-snippets/java/src/main/java/org/apache/ignite/snippets/CustomThreadPool.java + +[tabs] +-- +tab:XML[] + +[source, xml] +---- +include::code-snippets/xml/thread-pool.xml[tags=ignite-config;!discovery,indent=0] +---- + +tab:Java[] + +[source, java] +---- +include::{javaFile}[tags=pool-config,indent=0] +---- +-- + +Now, let's assume that you want to execute the following compute task in a thread from the `myPool` defined above: + +[source,java] +---- +include::{javaFile}[tags=inner-runnable,indent=0] +---- + +To do that, use `IgniteCompute.withExecutor()`, which will execute the task immediately from the parent task, as shown below: + +[source,java] +---- +include::{javaFile}[tags=outer-runnable,indent=0] +---- + +The parent task's execution might be triggered the following way and, in this scenario, it will be executed by the public pool: + +[source,java] +---- +ignite.compute().run(new OuterRunnable()); +---- + +[WARNING] +==== +[discrete] +=== Undefined Thread Pool + +If an application attempts to execute a compute task in a custom pool which is not defined in the configuration of the node, then a special warning message will be printed to the logs, and the task will be picked up by the public pool for execution. +==== diff --git a/docs/_docs/perf-and-troubleshooting/troubleshooting.adoc b/docs/_docs/perf-and-troubleshooting/troubleshooting.adoc new file mode 100644 index 0000000..9fd549cc --- /dev/null +++ b/docs/_docs/perf-and-troubleshooting/troubleshooting.adoc @@ -0,0 +1,150 @@ += Troubleshooting and Debugging + +This article covers some common tips and tricks for debugging and troubleshooting Ignite deployments. + +== Debugging Tools: Consistency Check Command + +The `./control.sh|bat` utility includes a set of link:tools/control-script#consistency-check-commands[consistency check commands] +that help with verifying internal data consistency invariants. + +== Persistence Files Disappear on Restart + +On some systems, the default location for Ignite persistence files might be under a `temp` folder. This can lead to situations when persistence files are removed by an operating system whenever a node process is restarted. To avoid this: + +* Ensure that `WARN` logging level is enabled for Ignite. You will see a warning if the persistence files are written to the temporary directory. +* Change the location of all persistence files using the `DataStorageConfiguration` APIs, such as `setStoragePath(...)`, +`setWalPath(...)`, and `setWalArchivePath(...)` + +== Cluster Doesn't Start After Field Type Changes + +When developing your application, you may need to change the type of a custom +object’s field. For instance, let’s say you have object `A` with field `A.range` of + `int` type and then you decide to change the type of `A.range` to `long` right in + the source code. When you do this, the cluster or the application will fail to + restart because Ignite doesn't support field/column type changes. + +When this happens _and you are still in development_, you need to go into the +file system and remove the following directories: `marshaller/`, `db/`, and `wal/` +located in the Ignite working directory (`db` and `wal` might be located in other +places if you have redefined their location). + +However, if you are _in production_ then instead of changing field types, add a +new field with a different name to your object model and remove the old one. This operation is fully +supported. At the same time, the `ALTER TABLE` command can be used to add new +columns or remove existing ones at run time. + +== Debugging GC Issues + +The section contains information that may be helpful when you need to debug and +troubleshoot issues related to Java heap usage or GC pauses. + +=== Heap Dumps + +If JVM generates `OutOfMemoryException` exceptions then dump the heap automatically the next time the exception occurs. +This helps if the root cause of this exception is not clear and a deeper look at the heap state at the moment of failure is required: + +++++ + + +++++ +[source,shell] +---- +-XX:+HeapDumpOnOutOfMemoryError +-XX:HeapDumpPath=/path/to/heapdump +-XX:OnOutOfMemoryError=“kill -9 %p” +-XX:+ExitOnOutOfMemoryError +---- +++++ + + +++++ + +=== Detailed GC Logs + +In order to capture detailed information about GC related activities, make sure you have the settings below configured +in the JVM settings of your cluster nodes: + +++++ + + +++++ +[source,shell] +---- +-XX:+PrintGCDetails +-XX:+PrintGCTimeStamps +-XX:+PrintGCDateStamps +-XX:+UseGCLogFileRotation +-XX:NumberOfGCLogFiles=10 +-XX:GCLogFileSize=100M +-Xloggc:/path/to/gc/logs/log.txt +---- +++++ + + +++++ + +Replace `/path/to/gc/logs/` with an actual path on your file system. + +In addition, for G1 collector set the property below. It provides many additional details that are +purposefully not included in the `-XX:+PrintGCDetails` setting: + +++++ + + +++++ +[source,shell] +---- +-XX:+PrintAdaptiveSizePolicy +---- +++++ + + +++++ + +=== Performance Analysis With Flight Recorder + +In cases when you need to debug performance or memory issues you can use Java Flight Recorder to continuously +collect low level runtime statistics, enabling after-the-fact incident analysis. To enable Java Flight Recorder use the +following settings: + +++++ + + +++++ +[source,shell] +---- +-XX:+UnlockCommercialFeatures +-XX:+FlightRecorder +-XX:+UnlockDiagnosticVMOptions +-XX:+DebugNonSafepoints +---- +++++ + + +++++ + +To start recording the state on a particular Ignite node use the following command: + +++++ + + +++++ +[source,shell] +---- +jcmd JFR.start name= duration=60s filename=/var/recording/recording.jfr settings=profile +---- +++++ + + +++++ + +For Flight Recorder related details refer to Oracle's official documentation. + +=== JVM Pauses + +Occasionally you may see an warning message about the JVM being paused for too long. It can happen during bulk loading, for example. + +Adjusting the `IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD` timeout setting may give the process time to finish without generating the warning. You can set the threshold via an environment variable, or pass it as a JVM argument (`-DIGNITE_JVM_PAUSE_DETECTOR_THRESHOLD=5000`) or as a parameter to ignite.sh (`-J-DIGNITE_JVM_PAUSE_DETECTOR_THRESHOLD=5000`). + +The value is in milliseconds. + diff --git a/docs/_docs/thread-pools.adoc b/docs/_docs/thread-pools.adoc deleted file mode 100644 index 5d1f9fb..0000000 --- a/docs/_docs/thread-pools.adoc +++ /dev/null @@ -1,136 +0,0 @@ -= Thread Pools - -== Overview - -Apache Ignite creates and maintains a variety of Thread pools that are used for different purposes depending on the -API being used. In this documentation, we list some of the well-known internal pools and show how you can create a -custom one. Refer to `IgniteConfiguration` javadoc to get a full list of thread pools available in Apache Ignite. - -== System Pool - -The system pool processes all the cache related operations except for SQL and some other types of queries. Also, this pool is -responsible for processing of Ignite Compute tasks' cancellation operations. - -The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setSystemThreadPoolSize(...)` to change the pool size. - -== Public Pool - -The public pool is the work-horse of Apache Ignite compute grid. All computations are received and processed by this pool. - -The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setPublicThreadPoolSize(...)` to change the pool size. - -== Queries Pool - -The queries pool takes care of all SQL, Scan and SPI queries that are being sent and executed across the cluster. - -The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setQueryThreadPoolSize(...)` to change the pool size. - -== Services Pool - -Apache Ignite Service Grid calls go to the services' thread pool. Having dedicated pools for Ignite Service and -Compute Grid components allows us to avoid threads starvation and deadlocks when a service implementation wants to call a computation or vice versa. - -The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setServiceThreadPoolSize(...)` to change the pool size. - -== Striped Pool - -The striped pool helps to accelerate basic cache operations and transactions significantly by spreading the operations -execution across multiple stripes that don't contend with each other. - -The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setStripedPoolSize(...)` to change the pool size. - -== Data Streamer Pool - -The data streamer pool processes all messages and requests coming from `IgniteDataStreamer` and a variety of streaming -adapters that use `IgniteDataStreamer` internally. - -The default pool size is `max(8, total number of cores)`. Use `IgniteConfiguration.setDataStreamerThreadPoolSize(...)` to change the pool size. - -== Custom Thread Pools - -It is possible to configure a custom thread pool for Ignite Compute tasks. This is useful if you want to execute one -compute task from another synchronously avoiding deadlocks. To guarantee this, you need to make sure that a nested -task is executed in a thread pool different from the parent's tasks thread pool. - -A custom pool is defined in `IgniteConfiguration` and has to have a unique name: - -[tabs] --- -tab:Java[] -[source,java] ----- -IgniteConfiguration cfg = ...; - -cfg.setExecutorConfiguration(new ExecutorConfiguration("myPool").setSize(16)); ----- -tab:XML[] -[source,xml] ----- - - ... - - - - - - - - - ... - ----- --- - -Now, let's assume that an Ignite Compute task below has to be executed in the `myPool` defined above: - -[tabs] --- -tab:Java[] -[source,java] ----- -public class InnerRunnable implements IgniteRunnable { - @Override public void run() { - System.out.println("Hello from inner runnable!"); - } -} ----- --- - -To do that, you need to use the `IgniteCompute.withExecutor()` method that will execute the task right away from an -implementation of the parent task, as shown below: - -[tabs] --- -tab:Java[] -[source,java] ----- -public class OuterRunnable implements IgniteRunnable { - @IgniteInstanceResource - private Ignite ignite; - - @Override public void run() { - // Synchronously execute InnerRunnable in custom executor. - ignite.compute().withExecutor("myPool").run(new InnerRunnable()); - } -} ----- --- - -The parent task's execution might be triggered the following way and, in this scenario, it will be executed by the public pool size: - -[tabs] --- -tab:Java[] -[source,java] ----- -ignite.compute().run(new OuterRunnable()); ----- --- - -[CAUTION] -==== -[discrete] -=== Undefined Thread Pool -If you attempt to execute a compute task in a custom thread pool that is not explicitly configured with Ignite, -then a special warning message will be printed in the node's logs, and the task will be picked up by the public pool for the execution. -====