ignite-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "NO" <727418...@qq.com>
Subject Read request response time is unstable, often more than 500 milliseconds, but the cluster load is small
Date Thu, 10 May 2018 02:42:32 GMT
hi,

Ignite version : 2.4.0

Read operations often exceed 500 milliseconds, but the cluster traffic is very small. I don't
know why. Please help me solve this problem. Thank you very much. Here is some configuration
information.

8 node : (48 core ,192G RAM, 4TB SSD) 
Cluster records : 1.7 billion primary keys , 1.7 billion backup keys
Get requests per second : 100+
Put requests per second : 400+
Each node occupies more than 500GB of disk space.

2 node :
LSB Version:    :core-4.1-amd64:core-4.1-noarch:cxx-4.1-amd64:cxx-4.1-noarch:desktop-4.1-amd64:desktop-4.1-noarch:languages-4.1-amd64:languages-4.1-noarch:printing-4.1-amd64:printing-4.1-noarch
Distributor ID:    CentOS
Description:    CentOS Linux release 7.2.1511 (Core) 
Release:    7.2.1511
Codename:    Core

6 node: 
LSB Version:    :base-4.0-amd64:base-4.0-noarch:core-4.0-amd64:core-4.0-noarch:graphics-4.0-amd64:graphics-4.0-noarch:printing-4.0-amd64:printing-4.0-noarch
Distributor ID:    CentOS
Description:    CentOS release 6.7 (Final)
Release:    6.7
Codename:    Final
=========================================================================
The node configuration is as follows
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xmlns:util="http://www.springframework.org/schema/util"
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
        http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd
        ">
    <bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
           <property name="failureDetectionTimeout" value="60000"/>        
        <property name="clientFailureDetectionTimeout" value="60000"/>             
  
        <property name="segmentationPolicy" value="RESTART_JVM"/>        
        <property name="publicThreadPoolSize" value="64"/>        
        <property name="systemThreadPoolSize" value="64"/>
        <property name="dataStreamerThreadPoolSize" value="64"/>
        <property name="rebalanceThreadPoolSize" value="4" />        
        <property name="dataStorageConfiguration">
            <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
                <property name="defaultDataRegionConfiguration">
                    <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
                        <property name="name" value="qipu_entity_cache_data_region"/>
                        <property name="initialSize" value="#{10L * 1024 * 1024 * 1024}"/>
                        <property name="maxSize" value="#{100L * 1024 * 1024 * 1024}"/>
                        <property name="persistenceEnabled" value="true"/>
                        <property name="metricsEnabled" value="true"/>
                        <property name="checkpointPageBufferSize" value="#{1 * 1024 * 1024
* 1024}"/>
                    </bean>
                </property>
                <property name="walSegmentSize" value="#{64 * 1024 * 1024}"/>
                <property name="pageSize" value="#{4 * 1024}"/>
                <property name="walSegments" value="#{20}"/>
                <property name="walMode" value="FSYNC"/>
                <property name="metricsEnabled" value="true"/>
                <property name="writeThrottlingEnabled" value="true"/>             
  
                <property name="checkpointThreads" value="8"/>                
                <property name="walThreadLocalBufferSize" value="#{1 * 1024 * 1024}"/>
            </bean>
        </property>
                
        <property name="cacheConfiguration">
            <bean class="org.apache.ignite.configuration.CacheConfiguration">
                <property name="dataRegionName" value="qipu_entity_cache_data_region"/>
                <property name="name" value="qipu_entity_cache"/>
                <property name="cacheMode" value="PARTITIONED"/>
                <property name="partitionLossPolicy" value="IGNORE"/>
                <property name="atomicityMode" value="ATOMIC"/>
                <property name="backups" value="1"/>
                <property name="writeSynchronizationMode" value="FULL_SYNC"/>
                <property name="statisticsEnabled" value="true"/>
                <property name="rebalanceBatchSize" value="#{20 * 1024 * 1024}"/>
                <property name="rebalanceThrottle" value="0"/>                
                <property name="rebalanceMode" value="ASYNC"/>            
                <property name="rebalanceBatchesPrefetchCount" value="4"/>         
      
                <property name="rebalanceTimeout" value="20000"/>                
                <property name="maxConcurrentAsyncOperations" value="#{4 * 500}"/>
            </bean>
        </property>
        
        <property name="communicationSpi">
            <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
                <property name="messageQueueLimit" value="20480"/>
            </bean>
        </property>
        <property name="discoverySpi">
            <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
                <property name="forceServerMode" value="true"/>
                <property name="ipFinder">
                    <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                        <property name="addresses">
                            <list>
                                <!-- In distributed environment, replace with actual host
IP address. -->
                                <value>10.13.13.39:47500..47509</value>
                                <value>10.13.13.49:47500..47509</value>
                                <value>10.13.13.50:47500..47509</value>
                                <value>10.13.13.51:47500..47509</value>
                                <value>10.13.13.59:47500..47509</value>
                                <value>10.13.13.60:47500..47509</value>
                                <value>10.13.13.61:47500..47509</value>
                                <value>10.13.13.63:47500..47509</value>
                            </list>
                        </property>
                    </bean>
                </property>
            </bean>
        </property>
        <property name="gridLogger">
            <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
                <constructor-arg type="java.lang.String" value="/home/qipu/production/apache-ignite-2.4.0/config/ignite-log4j2.xml"/>
            </bean>
        </property>
    </bean>
</beans>
=================================================================================================
#ignite.sh
JVM config
JVM_OPTS="-Xms24g -Xmx24g -server -XX:+AggressiveOpts -XX:MaxMetaspaceSize=512m"
JVM_OPTS="${JVM_OPTS} -XX:+AlwaysPreTouch"
JVM_OPTS="${JVM_OPTS} -XX:+UseG1GC"
JVM_OPTS="${JVM_OPTS} -XX:+ScavengeBeforeFullGC"
JVM_OPTS="${JVM_OPTS} -XX:+DisableExplicitGC"
JVM_OPTS="${JVM_OPTS} -XX:+HeapDumpOnOutOfMemoryError "
JVM_OPTS="${JVM_OPTS} -XX:HeapDumpPath=${IGNITE_HOME}/work"
JVM_OPTS="${JVM_OPTS} -XX:+PrintGCDetails"
JVM_OPTS="${JVM_OPTS} -XX:+PrintGCTimeStamps"
JVM_OPTS="${JVM_OPTS} -XX:+PrintGCDateStamps"
JVM_OPTS="${JVM_OPTS} -XX:+UseGCLogFileRotation"
JVM_OPTS="${JVM_OPTS} -XX:NumberOfGCLogFiles=10"
JVM_OPTS="${JVM_OPTS} -XX:GCLogFileSize=100M"
JVM_OPTS="${JVM_OPTS} -Xloggc:${IGNITE_HOME}/work/gc.log"
JVM_OPTS="${JVM_OPTS} -XX:+PrintAdaptiveSizePolicy"
JVM_OPTS="${JVM_OPTS} -XX:MaxGCPauseMillis=100"
=====================================================================================================
node config
#/etc/sysctl.conf
fs.file-max = 512000
net.core.rmem_max = 67108864
net.core.wmem_max = 67108864
net.core.rmem_default = 65536
net.core.wmem_default = 65536
net.core.netdev_max_backlog = 4096
net.core.somaxconn = 4096
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 0
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.ip_local_port_range = 10000 65000
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.tcp_max_tw_buckets = 5000
net.ipv4.tcp_rmem = 4096 87380 67108864
net.ipv4.tcp_wmem = 4096 65536 67108864
net.ipv4.tcp_mtu_probing = 1
vm.swappiness=0
vm.zone_reclaim_mode = 0
vm.dirty_writeback_centisecs = 500
vm.dirty_expire_centisecs = 500
===============================================
#/etc/security/limits.conf
*       soft    nofile          65535
*       hard    nofile          65535


# End of file
*               soft    nofile             65535
*               hard    nofile             65535
*       soft    nofile          81920
*       hard    nofile          81920
*       soft    nproc           81920
*       hard    nproc           81920
*       soft    core            10240
*       hard    core            10240
*    soft    data       unlimited
*    hard    data       unlimited
*    soft    stack      unlimited
*    hard    stack      unlimited
*    soft    memory     unlimited
*    hard    memory     unlimited
*    soft    cpu        unlimited
*    hard    cpu        unlimited
*    soft    memlock    unlimited
*    hard    memlock    unlimited

* hard memlock      unlimited
* soft memlock      unlimited
===============================================

client code
==============================================
Ignition.setClientMode(true);

        IgniteConfiguration cfg = new IgniteConfiguration();
        TcpDiscoverySpi spi = new TcpDiscoverySpi();

        TcpDiscoveryVmIpFinder finder = new TcpDiscoveryVmIpFinder();
        finder.setAddresses(Arrays.asList(env.getProperty("ignite.server").split(",")));
        spi.setIpFinder(finder);

        cfg.setDiscoverySpi(spi);
        cfg.setGridLogger(new Slf4jLogger());
        Ignite ignite = Ignition.start(cfg);
        IgniteCache<String, byte[]> igniteCache = ignite.getOrCreateCache("qipu_entity_cache");

        // get code 【Read operation response time often exceeds 1s】
        igniteCache.getAllAsync(keySet).get(1000);

        // put code
        // cache.putAllAsync(map).get(3000);
==============================================


Attachment is a node's gc log and node log

Please give some suggestions on how to reduce the read operation response time. Thank you.
Mime
View raw message