ignite-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From styriver <Scott_Tyri...@mgic.com>
Subject Configuration Recommendations after ESX maintenance failure
Date Fri, 24 Feb 2017 21:44:22 GMT
Hello 

I am looking for some general recommendation concerning configuration in a
VM environment. We ran into an issue where our network people were doing
some ESX maintenance and they confirmed that one of our hosts "moved" at the
time we detected a segmentation failure. I am including our current
configuration along with the errors we captured. We were thinking of
changing the segmentation policy but not sure what if any timeouts we should
change.

Configuration Dump:

Starting ignite with following configuration "IgniteConfiguration
[gridName=null, pubPoolSize=16, callbackPoolSize=16, sysPoolSize=16,
mgmtPoolSize=4, igfsPoolSize=2, utilityCachePoolSize=16,
utilityCacheKeepAliveTime=60000, marshCachePoolSize=16,
marshCacheKeepAliveTime=60000, p2pPoolSize=2, igniteHome=null,
igniteWorkDir=null, mbeanSrv=null, nodeId=null, marsh=null,
marshLocJobs=false, daemon=false, p2pEnabled=false, netTimeout=5000,
sndRetryDelay=1000, sndRetryCnt=3, clockSyncSamples=8, clockSyncFreq=120000,
metricsHistSize=10000, metricsUpdateFreq=2000,
metricsExpTime=9223372036854775807, discoSpi=TcpDiscoverySpi
[addrRslvr=null, sockTimeout=0, ackTimeout=0, marsh=JdkMarshaller [],
reconCnt=10, maxAckTimeout=600000, forceSrvMode=false,
clientReconnectDisabled=false], segPlc=STOP, segResolveAttempts=2,
waitForSegOnStart=true, allResolversPassReq=true, segChkFreq=10000,
commSpi=TcpCommunicationSpi [connectGate=null,
srvLsnr=org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi$2@211ff379,
locAddr=null, locHost=null, locPort=47100, locPortRange=100,
shmemPort=48100, directBuf=true, directSndBuf=false, idleConnTimeout=30000,
connTimeout=5000, maxConnTimeout=600000, reconCnt=10, sockSndBuf=32768,
sockRcvBuf=32768, msgQueueLimit=1024, slowClientQueueLimit=0, nioSrvr=null,
shmemSrv=null, tcpNoDelay=true, ackSndThreshold=16, unackedMsgsBufSize=0,
sockWriteTimeout=4000, lsnr=null, boundTcpPort=-1, boundTcpShmemPort=-1,
selectorsCnt=2, addrRslvr=null, rcvdMsgsCnt=0, sentMsgsCnt=0,
rcvdBytesCnt=0, sentBytesCnt=0,
ctxInitLatch=java.util.concurrent.CountDownLatch@1eff331d[Count = 1],
stopping=false,
metricsLsnr=org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi$3@2111dd2c],
evtSpi=null, colSpi=null, deploySpi=null, swapSpaceSpi=null,
indexingSpi=null, addrRslvr=null, clientMode=null,
rebalanceThreadPoolSize=1,
txCfg=org.apache.ignite.configuration.TransactionConfiguration@7f4b764c,
cacheSanityCheckEnabled=true, discoStartupDelay=60000, deployMode=SHARED,
p2pMissedCacheSize=100, locHost=null, timeSrvPortBase=31100,
timeSrvPortRange=100, failureDetectionTimeout=10000, metricsLogFreq=60000,
hadoopCfg=null,
connectorCfg=org.apache.ignite.configuration.ConnectorConfiguration@184bc563,
odbcCfg=null, warmupClos=null, atomicCfg=AtomicConfiguration
[seqReserveSize=1000, cacheMode=PARTITIONED, backups=0], classLdr=null,
sslCtxFactory=null, platformCfg=null, binaryCfg=null,
lateAffAssignment=true]

Log File:
16:03:57.566 [tcp-disco-msg-worker-#2%null%] WARN 
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi - Timed out waiting for
message delivery receipt (most probably, the reason is in long GC pauses on
remote node; consider tuning GC and increasing 'ackTimeout' configuration
property). Will retry to send message with increased timeout. Current
timeout: 10000.
16:03:57.568 [tcp-disco-msg-worker-#2%null%] WARN 
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi - Failed to send message
to next node [msg=TcpDiscoveryStatusCheckMessage
[creatorNode=TcpDiscoveryNode [id=c3da99ae-456d-4b52-85e6-c24587fbf08e,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=183,
intOrder=102, lastExchangeTime=1487887425492, loc=true,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false], failedNodeId=null,
status=0, super=TcpDiscoveryAbstractMessage [sndNodeId=null,
id=f9c0ee85a51-c3da99ae-456d-4b52-85e6-c24587fbf08e, verifierNodeId=null,
topVer=0, pendingIdx=0, failedNodes=null, isClient=false]],
next=TcpDiscoveryNode [id=95e261af-ce55-4843-8d9d-8bf046cc4118,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=117,
intOrder=69, lastExchangeTime=1487538461673, loc=false,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false], errMsg=Failed to send
message to next node [msg=TcpDiscoveryStatusCheckMessage
[creatorNode=TcpDiscoveryNode [id=c3da99ae-456d-4b52-85e6-c24587fbf08e,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=183,
intOrder=102, lastExchangeTime=1487887425492, loc=true,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false], failedNodeId=null,
status=0, super=TcpDiscoveryAbstractMessage [sndNodeId=null,
id=f9c0ee85a51-c3da99ae-456d-4b52-85e6-c24587fbf08e, verifierNodeId=null,
topVer=0, pendingIdx=0, failedNodes=null, isClient=false]], next=ClusterNode
[id=95e261af-ce55-4843-8d9d-8bf046cc4118, order=117,
addr=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163], daemon=false]]]
16:03:57.594 [tcp-disco-msg-worker-#2%null%] WARN 
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi - Local node has
detected failed nodes and started cluster-wide procedure. To speed up
failure detection please see 'Failure Detection' section under javadoc for
'TcpDiscoverySpi'
16:03:57.601 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=95e261af-ce55-4843-8d9d-8bf046cc4118,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=117,
intOrder=69, lastExchangeTime=1487538461673, loc=false,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false]
16:03:57.603 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=206, servers=1, clients=19, CPUs=10, heap=86.0GB]
16:04:05.670 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=5b65d779-8317-496c-b8f2-cdc4de33705c,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=50, intOrder=30,
lastExchangeTime=1487538461443, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:05.672 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=207, servers=1, clients=18, CPUs=10, heap=85.0GB]
16:04:06.106 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=a9104ef7-da59-4575-acdc-dd98fece82c5,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=141, intOrder=81,
lastExchangeTime=1487538461633, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:06.112 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=208, servers=1, clients=17, CPUs=10, heap=81.0GB]
16:04:06.233 [grid-nio-worker-1-#11%null%] WARN 
org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi - Communication
SPI Session write timed out (consider increasing 'socketWriteTimeout'
configuration property) [remoteAddr=/172.22.190.163:42506,
writeTimeout=4000]
16:04:06.538 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=9c972234-7c36-40e9-88f2-bc9da61a02ec,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=145, intOrder=83,
lastExchangeTime=1487538461633, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:06.540 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=209, servers=1, clients=16, CPUs=10, heap=76.0GB]
16:04:06.832 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=1addd9a8-7022-4286-a321-d4a5aef5985b,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=156, intOrder=84,
lastExchangeTime=1487538461673, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:06.848 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=210, servers=1, clients=15, CPUs=10, heap=76.0GB]
16:04:06.998 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=6e92ece8-f7bc-482b-bfa6-a25276bed9cd,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=173, intOrder=94,
lastExchangeTime=1487538461673, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:07.003 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=211, servers=1, clients=14, CPUs=10, heap=71.0GB]
16:04:07.104 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=b3d132d6-b315-4be3-b9ac-9ef427cef0b7,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=196, intOrder=108,
lastExchangeTime=1487818281767, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:07.110 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=212, servers=1, clients=13, CPUs=10, heap=69.0GB]
16:04:07.204 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=1d1cae7c-fa06-49ed-b50a-f39146c8b06b,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=202, intOrder=111,
lastExchangeTime=1487818535575, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:07.209 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=213, servers=1, clients=12, CPUs=10, heap=65.0GB]
16:04:07.261 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=79379bea-a740-4385-a2da-73d29d4d658c,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.163],
sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=205, intOrder=113,
lastExchangeTime=1487819123943, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:07.264 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=214, servers=1, clients=11, CPUs=10, heap=62.0GB]
16:04:11.795 [exchange-worker-#22%null%] WARN 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Failed to send partitions full message [node=TcpDiscoveryNode
[id=9a79ea6a-487e-4296-81a5-483ed93ceaeb, addrs=[0:0:0:0:0:0:0:1%lo,
127.0.0.1, 172.22.190.163], sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0,
/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0], discPort=0, order=198, intOrder=109,
lastExchangeTime=1487818313317, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true], err=class o.a.i.IgniteCheckedException: Failed to send
message (node may have left the grid or TCP connection cannot be established
due to firewall issues) [node=TcpDiscoveryNode
[id=9a79ea6a-487e-4296-81a5-483ed93ceaeb, addrs=[0:0:0:0:0:0:0:1%lo,
127.0.0.1, 172.22.190.163], sockAddrs=[rh6-pr-ho-busacq-02/172.22.190.163:0,
/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0], discPort=0, order=198, intOrder=109,
lastExchangeTime=1487818313317, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true], topic=TOPIC_CACHE, msg=GridDhtPartitionsFullMessage
[parts={-2100569601=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=886,
size=1], 689859866=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=1293,
size=1], -1728077271=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=1300,
size=1], 1597441201=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=1299,
size=1], 524260103=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=1301,
size=1], -667441411=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=1302,
size=1], 1325947219=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=806,
size=1], -486088806=GridDhtPartitionFullMap
[nodeId=c3da99ae-456d-4b52-85e6-c24587fbf08e, nodeOrder=183, updateSeq=1299,
size=1]}, partCntrs=null, topVer=AffinityTopologyVersion [topVer=-1,
minorTopVer=0], compress=true, partCnt=8,
super=GridDhtPartitionsAbstractMessage [exchId=null, lastVer=null, flags=1,
super=GridCacheMessage [msgId=10020813, depInfo=null, err=null,
skipPrepare=false, cacheId=0, cacheId=0]]], policy=2]]
16:04:11.861 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=206, minorTopVer=0], evt=NODE_FAILED,
node=95e261af-ce55-4843-8d9d-8bf046cc4118]
16:04:11.894 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=207, minorTopVer=0], evt=NODE_FAILED,
node=5b65d779-8317-496c-b8f2-cdc4de33705c]
16:04:11.922 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=208, minorTopVer=0], evt=NODE_FAILED,
node=a9104ef7-da59-4575-acdc-dd98fece82c5]
16:04:11.940 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=209, minorTopVer=0], evt=NODE_FAILED,
node=9c972234-7c36-40e9-88f2-bc9da61a02ec]
16:04:11.953 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=210, minorTopVer=0], evt=NODE_FAILED,
node=1addd9a8-7022-4286-a321-d4a5aef5985b]
16:04:11.970 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=211, minorTopVer=0], evt=NODE_FAILED,
node=6e92ece8-f7bc-482b-bfa6-a25276bed9cd]
16:04:11.996 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=212, minorTopVer=0], evt=NODE_FAILED,
node=b3d132d6-b315-4be3-b9ac-9ef427cef0b7]
16:04:12.017 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=213, minorTopVer=0], evt=NODE_FAILED,
node=1d1cae7c-fa06-49ed-b50a-f39146c8b06b]
16:04:12.076 [exchange-worker-#22%null%] INFO 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion
[topVer=214, minorTopVer=0], evt=NODE_FAILED,
node=79379bea-a740-4385-a2da-73d29d4d658c]
16:04:15.323 [grid-nio-worker-1-#11%null%] WARN 
org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi - Failed to
process selector key (will close): GridSelectorNioSessionImpl
[selectorIdx=1, queueSize=0, writeBuf=java.nio.DirectByteBuffer[pos=0
lim=32768 cap=32768], readBuf=java.nio.DirectByteBuffer[pos=0 lim=32768
cap=32768], recovery=null, super=GridNioSessionImpl
[locAddr=/172.22.190.162:47108, rmtAddr=/172.22.190.163:38916,
createTime=1487887452150, closeTime=0, bytesSent=2182, bytesRcvd=1910,
sndSchedTime=1487887455302, lastSndTime=1487887455322,
lastRcvTime=1487887455322, readsPaused=false,
filterChain=FilterChain[filters=[GridNioCodecFilter
[parser=o.a.i.i.util.nio.GridDirectParser@6357fa17, directMode=true],
GridConnectionBytesVerifyFilter, SSL filter], accepted=true]]
16:04:15.324 [grid-nio-worker-1-#11%null%] WARN 
org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi - Closing NIO
session because of unhandled exception [cls=class
o.a.i.i.util.nio.GridNioException, msg=Failed to create message writer.]
16:04:19.502 [tcp-disco-msg-worker-#2%null%] WARN 
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi - Node is out of
topology (probably, due to short-time network problems).
16:04:19.502 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Local
node SEGMENTED: TcpDiscoveryNode [id=c3da99ae-456d-4b52-85e6-c24587fbf08e,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=183,
intOrder=102, lastExchangeTime=1487887459497, loc=true,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false]
16:04:19.577 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Stopping local node according to configured segmentation policy.
16:04:19.583 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=69633095-feec-48fb-b96d-23a1a7b9b620,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=166, intOrder=91,
lastExchangeTime=1487538461663, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:19.589 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=215, servers=1, clients=10, CPUs=10, heap=61.0GB]
16:04:19.603 [Thread-6376] INFO 
org.apache.ignite.internal.processors.rest.protocols.tcp.GridTcpRestProtocol
- Command protocol successfully stopped: TCP binary
16:04:19.608 [exchange-worker-#22%null%] ERROR
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture
- Failed to reinitialize local partitions (preloading will be stopped):
GridDhtPartitionExchangeId [topVer=AffinityTopologyVersion [topVer=215,
minorTopVer=0], nodeId=69633095, evt=NODE_FAILED]
java.lang.IllegalStateException: Failed to process swap event (grid is
stopping).
	at
org.apache.ignite.internal.processors.query.GridQueryProcessor.onUnswap(GridQueryProcessor.java:1235)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.query.GridCacheQueryManager.onUnswap(GridCacheQueryManager.java:394)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheMapEntry.evictFailed(GridCacheMapEntry.java:4315)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheMapEntry.evictInternal(GridCacheMapEntry.java:4212)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheEvictionManager.evict0(GridCacheEvictionManager.java:709)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheEvictionManager.touch(GridCacheEvictionManager.java:798)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheMvccManager.removeExplicitNodeLocks(GridCacheMvccManager.java:330)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onLeft(GridDhtPartitionsExchangeFuture.java:831)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onClientNodeEvent(GridDhtPartitionsExchangeFuture.java:614)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:466)
[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:1656)
[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
[ignite-core-1.8.0.jar:1.8.0]
	at java.lang.Thread.run(Thread.java:745) [?:1.8.0_66]
16:04:19.609 [exchange-worker-#22%null%] ERROR
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager
- Failed to wait for completion of partition map exchange (preloading will
not start): GridDhtPartitionsExchangeFuture [dummy=false,
forcePreload=false, reassign=false, discoEvt=DiscoveryEvent
[evtNode=TcpDiscoveryNode [id=69633095-feec-48fb-b96d-23a1a7b9b620,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=166, intOrder=91,
lastExchangeTime=1487538461663, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true], topVer=215, nodeId8=c3da99ae, msg=Node failed:
TcpDiscoveryNode [id=69633095-feec-48fb-b96d-23a1a7b9b620,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=166, intOrder=91,
lastExchangeTime=1487538461663, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true], type=NODE_FAILED, tstamp=1487887459589],
crd=TcpDiscoveryNode [id=c3da99ae-456d-4b52-85e6-c24587fbf08e,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=183,
intOrder=102, lastExchangeTime=1487887459497, loc=true,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false],
exchId=GridDhtPartitionExchangeId [topVer=AffinityTopologyVersion
[topVer=215, minorTopVer=0], nodeId=69633095, evt=NODE_FAILED], added=true,
initFut=GridFutureAdapter [resFlag=2, res=false, startTime=1487887459589,
endTime=1487887459599, ignoreInterrupts=false, state=DONE], init=false,
topSnapshot=null, lastVer=null, partReleaseFut=null, affChangeMsg=null,
skipPreload=false, clientOnlyExchange=false, initTs=1487887459589,
centralizedAff=false, evtLatch=0, remaining=[], srvNodes=[TcpDiscoveryNode
[id=c3da99ae-456d-4b52-85e6-c24587fbf08e, addrs=[0:0:0:0:0:0:0:1%lo,
127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:47500,
/0:0:0:0:0:0:0:1%lo:47500, /127.0.0.1:47500], discPort=47500, order=183,
intOrder=102, lastExchangeTime=1487887459497, loc=true,
ver=1.8.0#20161205-sha1:9ca40dbe, isClient=false]], super=GridFutureAdapter
[resFlag=1, res=java.lang.IllegalStateException: Failed to process swap
event (grid is stopping)., startTime=1487887459589, endTime=1487887459599,
ignoreInterrupts=false, state=DONE]]
org.apache.ignite.IgniteCheckedException: Failed to process swap event (grid
is stopping).
	at org.apache.ignite.internal.util.IgniteUtils.cast(IgniteUtils.java:7185)
[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:197)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:138)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:1662)
[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
[ignite-core-1.8.0.jar:1.8.0]
	at java.lang.Thread.run(Thread.java:745) [?:1.8.0_66]
Caused by: java.lang.IllegalStateException: Failed to process swap event
(grid is stopping).
	at
org.apache.ignite.internal.processors.query.GridQueryProcessor.onUnswap(GridQueryProcessor.java:1235)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.query.GridCacheQueryManager.onUnswap(GridCacheQueryManager.java:394)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheMapEntry.evictFailed(GridCacheMapEntry.java:4315)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheMapEntry.evictInternal(GridCacheMapEntry.java:4212)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheEvictionManager.evict0(GridCacheEvictionManager.java:709)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheEvictionManager.touch(GridCacheEvictionManager.java:798)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCacheMvccManager.removeExplicitNodeLocks(GridCacheMvccManager.java:330)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onLeft(GridDhtPartitionsExchangeFuture.java:831)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onClientNodeEvent(GridDhtPartitionsExchangeFuture.java:614)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:466)
~[ignite-core-1.8.0.jar:1.8.0]
	at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:1656)
~[ignite-core-1.8.0.jar:1.8.0]
	... 2 more
16:04:19.668 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=85d9bab7-916b-4a16-9044-f18ff7722e56,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=170, intOrder=92,
lastExchangeTime=1487538461673, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:19.677 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=216, servers=1, clients=9, CPUs=10, heap=61.0GB]
16:04:19.739 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=2e5d0db3-2b5b-4257-8d81-d31895ead461,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.162],
sockAddrs=[rh6-pr-ho-busacq-01/172.22.190.162:0, /0:0:0:0:0:0:0:1%lo:0,
/127.0.0.1:0], discPort=0, order=180, intOrder=100,
lastExchangeTime=1487538461683, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:19.750 [grid-nio-worker-1-#11%null%] WARN 
org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi - Communication
SPI Session write timed out (consider increasing 'socketWriteTimeout'
configuration property) [remoteAddr=/172.22.190.80:39238, writeTimeout=4000]
16:04:19.760 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=217, servers=1, clients=8, CPUs=10, heap=57.0GB]
16:04:19.760 [disco-event-worker-#18%null%] WARN 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager - Node
FAILED: TcpDiscoveryNode [id=79c207fe-504e-4bd3-8f1b-590a02219873,
addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 172.22.190.80],
sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0,
rh6-pr-ho-img-01/172.22.190.80:0], discPort=0, order=185, intOrder=103,
lastExchangeTime=1487818028251, loc=false, ver=1.8.0#20161205-sha1:9ca40dbe,
isClient=true]
16:04:19.777 [disco-event-worker-#18%null%] INFO 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager -
Topology snapshot [ver=218, servers=1, clients=7, CPUs=8, heap=45.0GB]
16:04:20.169 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: DocumentDataCache
16:04:20.172 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: RegistrationCache
16:04:20.174 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: ConversionStatusCache
16:04:20.435 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: ImageCache
16:04:20.436 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: ignite-marshaller-sys-cache
16:04:20.436 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: ignite-sys-cache
16:04:20.436 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: ignite-atomics-sys-cache
16:04:20.436 [Thread-6376] INFO 
org.apache.ignite.internal.processors.cache.GridCacheProcessor - Stopped
cache: LoanContainerCache
16:04:20.468 [Thread-6376] INFO  org.apache.ignite.internal.IgniteKernal - 

>>> +---------------------------------------------------------------------------------+
>>> Ignite ver. 1.8.0#20161205-sha1:9ca40dbeb7d559fcb299bdb6f5c90cdf8ce7e533
>>> stopped OK
>>> +---------------------------------------------------------------------------------+
>>> Grid uptime: 36:56:35:190

Xml Config:
   <import resource="classpath:ignite-cacheDefs.xml"/>
    
    <bean id="ignite.cfg"
class="org.apache.ignite.configuration.IgniteConfiguration">    

		<property name="gridLogger"> 
			<bean class="org.apache.ignite.logger.slf4j.Slf4jLogger" />
		</property>
		
		<property name="communicationSpi">
            <bean
class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
                
                <property name="socketWriteTimeout" value="4000"/>
            </bean>
        </property>
        
		<property name="peerClassLoadingEnabled" value="false"/>
		
    	<property name="userAttributes">
        <map>
            <entry key="AppName" value="Bus Acq Cache Manager"/>
        </map>
    	</property>
    	
    	<property name="cacheConfiguration">
			<list>
				<bean parent="imageCache">           	
						
					<property name="offHeapMaxMemory" value="#{10 * 1024L * 1024L * 1024L}"
/>

						
					<property name="statisticsEnabled" value="true"/> 
																																				
				</bean>
				
				<bean parent="conversionStatusCache">
				</bean>
				
				<bean parent="registrationCache">
				</bean>
				
				<bean parent="documentDataCache">
				</bean>								

		        
			</list>
		</property>
		
	        
        <property name="discoverySpi">
            <bean
class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
                <property name="ipFinder">
                    <bean
class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder">
                        <property name="multicastGroup" value="xxx.xx.xx.x"
/>
                    </bean>
                </property>
            </bean>
        </property>
        <property name="metricsLogFrequency" value="1800000" /> 
        
    </bean>    

Cache Definitions:

	<bean id="imageCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
			
		<property name="name" value="ImageCache"/>

			
		<property name="cacheMode" value="REPLICATED"/>
		
			
		<property name="memoryMode" value="OFFHEAP_TIERED"/>
	
			
		<property name="swapEnabled" value="false"/> 	
		         
        <property name="indexedTypes">
			<util:list id="myList" value-type="java.lang.Class">
			    <value>java.util.UUID</value>
			    <value>com.mgic.documentviewer.imaging.cache.beans.TiffPage</value>
			</util:list>
       </property>
       
    	
		<property name="sqlOnheapRowCacheSize" value="1" />
					
       		
		<property name="atomicityMode" value="TRANSACTIONAL" />
		      		
	</bean>
	
	<bean id="registrationCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
			
		<property name="name" value="RegistrationCache"/>
		
			
		<property name="cacheMode" value="REPLICATED"/>

			
		<property name="memoryMode" value="ONHEAP_TIERED"/>
	
			
		<property name="swapEnabled" value="false"/> 	
		
			
		<property name="atomicityMode" value="TRANSACTIONAL" />
	</bean>		

	<bean id="documentDataCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
			
		<property name="name" value="DocumentDataCache"/>
		
			
		<property name="cacheMode" value="REPLICATED"/>

			
		<property name="memoryMode" value="ONHEAP_TIERED"/>
	
			
		<property name="swapEnabled" value="false"/> 	
		
			
		<property name="atomicityMode" value="TRANSACTIONAL" />
	</bean>		
	
	<bean id="conversionStatusCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
			
		<property name="name" value="ConversionStatusCache"/>
		
			
		<property name="cacheMode" value="REPLICATED"/>

			
		<property name="memoryMode" value="ONHEAP_TIERED"/>
	
			
		<property name="swapEnabled" value="false"/> 	
		
			
		<property name="atomicityMode" value="TRANSACTIONAL" />
	</bean>	
	
	<bean id="loanContainerCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
	    
	  <property name="name" value="LoanContainerCache"/>
	  
	    
	  <property name="cacheMode" value="REPLICATED"/>
	
	    
	  <property name="memoryMode" value="ONHEAP_TIERED"/>
	
	    
	  <property name="swapEnabled" value="false"/>  
	  
	    
	  <property name="atomicityMode" value="TRANSACTIONAL" />
 
	</bean>   	
	
	<bean id="testLockCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
	    
	  <property name="name" value="TestLockCache"/>
	  
	    
	  <property name="cacheMode" value="REPLICATED"/>
	
	    
	  <property name="memoryMode" value="ONHEAP_TIERED"/>
	
	    
	  <property name="swapEnabled" value="false"/>  
	  
	    
	  <property name="atomicityMode" value="TRANSACTIONAL" />
	  
	  <property name="writeSynchronizationMode" value="FULL_SYNC"/>
  
	</bean>   
	
	<bean id="testTransactionCache" abstract="true"
class="org.apache.ignite.configuration.CacheConfiguration">           
	    
	  <property name="name" value="TestTransactionCache"/>
	  
	    
	  <property name="cacheMode" value="REPLICATED"/>
	
	    
	  <property name="memoryMode" value="ONHEAP_TIERED"/>
	
	    
	  <property name="swapEnabled" value="false"/>  
	  
	    
	  <property name="atomicityMode" value="TRANSACTIONAL" />
	  
	  <property name="writeSynchronizationMode" value="FULL_SYNC"/>
  
	</bean>   			
	



--
View this message in context: http://apache-ignite-users.70518.x6.nabble.com/Configuration-Recommendations-after-ESX-maintenance-failure-tp10878.html
Sent from the Apache Ignite Users mailing list archive at Nabble.com.

Mime
View raw message