Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 5D9CB200D45 for ; Wed, 8 Nov 2017 22:10:14 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 5C3731609E0; Wed, 8 Nov 2017 21:10:14 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 2C536160BDA for ; Wed, 8 Nov 2017 22:10:13 +0100 (CET) Received: (qmail 50190 invoked by uid 500); 8 Nov 2017 21:10:12 -0000 Mailing-List: contact issues-help@geode.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@geode.apache.org Delivered-To: mailing list issues@geode.apache.org Received: (qmail 50181 invoked by uid 99); 8 Nov 2017 21:10:12 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd3-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 08 Nov 2017 21:10:12 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd3-us-west.apache.org (ASF Mail Server at spamd3-us-west.apache.org) with ESMTP id 777771807A1 for ; Wed, 8 Nov 2017 21:10:11 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd3-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -99.202 X-Spam-Level: X-Spam-Status: No, score=-99.202 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, RP_MATCHES_RCVD=-0.001, SPF_PASS=-0.001, USER_IN_WHITELIST=-100] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd3-us-west.apache.org [10.40.0.10]) (amavisd-new, port 10024) with ESMTP id hZHbGlJBAzCo for ; Wed, 8 Nov 2017 21:10:06 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with ESMTP id 71BEA61579 for ; Wed, 8 Nov 2017 21:10:04 +0000 (UTC) Received: from jira-lw-us.apache.org (unknown [207.244.88.139]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id E9B27E0EEF for ; Wed, 8 Nov 2017 21:10:02 +0000 (UTC) Received: from jira-lw-us.apache.org (localhost [127.0.0.1]) by jira-lw-us.apache.org (ASF Mail Server at jira-lw-us.apache.org) with ESMTP id 4A947240EA for ; Wed, 8 Nov 2017 21:10:02 +0000 (UTC) Date: Wed, 8 Nov 2017 21:10:02 +0000 (UTC) From: "ASF GitHub Bot (JIRA)" To: issues@geode.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Commented] (GEODE-3940) Backup can hang while trying to get a lock MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 archived-at: Wed, 08 Nov 2017 21:10:14 -0000 [ https://issues.apache.org/jira/browse/GEODE-3940?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16244734#comment-16244734 ] ASF GitHub Bot commented on GEODE-3940: --------------------------------------- nreich commented on a change in pull request #1038: GEODE-3940: fix deadlock in backup messages URL: https://github.com/apache/geode/pull/1038#discussion_r149792718 ########## File path: geode-core/src/test/java/org/apache/geode/admin/internal/FinishBackupRequestTest.java ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.geode.admin.internal; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.InOrder; + +import org.apache.geode.admin.internal.FinishBackupRequest.FinishBackupReplyProcessor; +import org.apache.geode.cache.CacheClosedException; +import org.apache.geode.cache.persistence.PersistentID; +import org.apache.geode.distributed.DistributedMember; +import org.apache.geode.distributed.internal.DM; +import org.apache.geode.distributed.internal.ReplyException; +import org.apache.geode.distributed.internal.membership.InternalDistributedMember; +import org.apache.geode.internal.admin.remote.AdminFailureResponse; +import org.apache.geode.internal.admin.remote.AdminResponse; +import org.apache.geode.internal.cache.BackupManager; +import org.apache.geode.internal.cache.InternalCache; +import org.apache.geode.test.junit.categories.UnitTest; + +@Category(UnitTest.class) +public class FinishBackupRequestTest { + + private FinishBackupRequest finishBackupRequest; + + private FinishBackupReplyProcessor replyProcessor; + private DM dm; + private InternalCache cache; + private BackupManager backupManager; + private File targetDir; + private File baselineDir; + + private InternalDistributedMember localMember; + private InternalDistributedMember member1; + private InternalDistributedMember member2; + + private Set recipients; + + @Before + public void setUp() throws Exception { + // mocks here + replyProcessor = mock(FinishBackupReplyProcessor.class); + dm = mock(DM.class); + cache = mock(InternalCache.class); + backupManager = mock(BackupManager.class); + targetDir = mock(File.class); + baselineDir = mock(File.class); + + when(dm.getCache()).thenReturn(cache); + when(dm.getDistributionManagerId()).thenReturn(localMember); + when(cache.getBackupManager()).thenReturn(backupManager); + when(replyProcessor.getResults()).thenReturn(Collections.emptyMap()); + + localMember = mock(InternalDistributedMember.class); + member1 = mock(InternalDistributedMember.class); + member2 = mock(InternalDistributedMember.class); + + recipients = new HashSet<>(); + recipients.add(member1); + recipients.add(member2); + + finishBackupRequest = + new FinishBackupRequest(dm, recipients, replyProcessor, targetDir, baselineDir, false); + } + + @Test + public void getRecipientsReturnsRecipientMembers() throws Exception { + assertThat(finishBackupRequest.getRecipients()).hasSize(2).contains(member1, member2); + } + + @Test + public void getRecipientsDoesNotIncludeNull() throws Exception { + InternalDistributedMember nullMember = null; + + assertThat(finishBackupRequest.getRecipients()).doesNotContain(nullMember); + } + + @Test + public void sendShouldUseDMToSendMessage() throws Exception { + finishBackupRequest.send(); + + verify(dm, times(1)).putOutgoing(finishBackupRequest); + } + + @Test + public void sendShouldWaitForRepliesFromRecipients() throws Exception { + finishBackupRequest.send(); + + verify(replyProcessor, times(1)).waitForReplies(); + } + + @Test + public void sendShouldReturnResultsContainingRecipientsAndLocalMember() throws Exception { + Set localMember_PersistentIdSet = new HashSet<>(); Review comment: Should stick to project standard camel-case for variable names. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org > Backup can hang while trying to get a lock > ------------------------------------------- > > Key: GEODE-3940 > URL: https://issues.apache.org/jira/browse/GEODE-3940 > Project: Geode > Issue Type: Bug > Components: persistence > Reporter: Lynn Gallinat > Assignee: Kirk Lund > Priority: Trivial > > {noformat} > Backup can hang when createKrf cannot get the compactor lock. > "Pooled Message Processor 2" #196 daemon prio=10 os_prio=0 tid=0x00007fef9801e000 nid=0x3cf3 waiting on condition [0x00007ff094cd5000] > java.lang.Thread.State: WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000000f1a72c60> (a java.util.concurrent.locks.ReentrantLock$NonfairSync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) > at java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209) > at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285) > at org.apache.geode.internal.cache.Oplog.lockCompactor(Oplog.java:6046) <============ trying to get compactor lock > at org.apache.geode.internal.cache.Oplog.createKrf(Oplog.java:4157) <============ in createKrf > at org.apache.geode.internal.cache.Oplog.finishKrf(Oplog.java:7746) > at org.apache.geode.internal.cache.BackupManager.backupOplog(BackupManager.java:580) > at org.apache.geode.internal.cache.BackupManager.completeBackup(BackupManager.java:270) > at org.apache.geode.internal.cache.BackupManager.doBackup(BackupManager.java:139) > at org.apache.geode.admin.internal.FinishBackupRequest.createResponse(FinishBackupRequest.java:102) > at org.apache.geode.internal.admin.remote.CliLegacyMessage.process(CliLegacyMessage.java:39) > at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:374) > at org.apache.geode.distributed.internal.DistributionMessage$1.run(DistributionMessage.java:440) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at org.apache.geode.distributed.internal.DistributionManager.runUntilShutdown(DistributionManager.java:666) > at org.apache.geode.distributed.internal.DistributionManager$4$1.run(DistributionManager.java:923) > at java.lang.Thread.run(Thread.java:748) > The compactor thread already has the compactor lock. > It is waiting for a DiskEntry lock. > "Idle OplogCompactor" #379 daemon prio=10 os_prio=0 tid=0x00007ff020026000 nid=0x6916 waiting for monitor entry [0x00007ff08c7ce000] > java.lang.Thread.State: BLOCKED (on object monitor) > at org.apache.geode.internal.cache.Oplog.writeOneKeyEntryForKRF(Oplog.java:3924) > - waiting to lock <0x00000000f1c55c70> (a org.apache.geode.internal.cache.entries.VersionedThinDiskRegionEntryOffHeapStringKey2) <====== waiting for DiskEntry lock > at org.apache.geode.internal.cache.Oplog.createKrf(Oplog.java:4201) <======= already in createKrf > - locked <0x00000000f1a72c30> (a java.util.concurrent.atomic.AtomicBoolean) > at org.apache.geode.internal.cache.Oplog$2.run(Oplog.java:3875) > at org.apache.geode.internal.cache.DiskStoreImpl$5.run(DiskStoreImpl.java:4386) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > The DiskEntry lock is held by this thread and is waiting for replies: > "PartitionedRegion Message Processor26" #238 daemon prio=10 os_prio=0 tid=0x00007fef9c01f800 nid=0x3dd0 waiting on condition [0x00007ff08ebe9000] > java.lang.Thread.State: TIMED_WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000000f1e42200> (a java.util.concurrent.CountDownLatch$Sync) > at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(AbstractQueuedSynchronizer.java:1037) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(AbstractQueuedSynchronizer.java:1328) > at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:277) > at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:64) > at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:718) > at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:795) > at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:771) > at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:858) > at org.apache.geode.internal.cache.DistributedCacheOperation.waitForAckIfNeeded(DistributedCacheOperation.java:761) > at org.apache.geode.internal.cache.DistributedCacheOperation._distribute(DistributedCacheOperation.java:658) > at org.apache.geode.internal.cache.DistributedCacheOperation.startOperation(DistributedCacheOperation.java:264) > at org.apache.geode.internal.cache.BucketRegion.basicPutPart2(BucketRegion.java:665) > at org.apache.geode.internal.cache.AbstractRegionMap.basicPut(AbstractRegionMap.java:2839) > - locked <0x00000000f1c55c70> (a org.apache.geode.internal.cache.entries.VersionedThinDiskRegionEntryOffHeapStringKey2) <======= has the DiskEntryLock > at org.apache.geode.internal.cache.BucketRegion.virtualPut(BucketRegion.java:502) > at org.apache.geode.internal.cache.PartitionedRegionDataStore.putLocally(PartitionedRegionDataStore.java:1222) > at org.apache.geode.internal.cache.PartitionedRegionDataStore.putLocally(PartitionedRegionDataStore.java:1205) > at org.apache.geode.internal.cache.PartitionedRegionDataView.putEntryOnRemote(PartitionedRegionDataView.java:99) > at org.apache.geode.internal.cache.partitioned.PutMessage.operateOnPartitionedRegion(PutMessage.java:744) > at org.apache.geode.internal.cache.partitioned.PartitionMessage.process(PartitionMessage.java:333) > at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:374) > at org.apache.geode.distributed.internal.DistributionMessage$1.run(DistributionMessage.java:440) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at org.apache.geode.distributed.internal.DistributionManager.runUntilShutdown(DistributionManager.java:666) > at org.apache.geode.distributed.internal.DistributionManager$8$1.run(DistributionManager.java:1069) > at java.lang.Thread.run(Thread.java:748) > The member replying can't because it's waiting for the backup lock. > "P2P message reader for rs-CommunicationsBTTest-2017-10-27-08-42-54-client-8(gemfire3_rs-CommunicationsBTTest-2017-10-27-08-42-54-client-8_14506:14506):1028 shared ordered uid=6 port=40404" #152 daemon prio=10 os_prio=0 tid=0x00007ff91800d800 nid=0x3bf3 waiting on condition [0x00007ffa29d4a000] > java.lang.Thread.State: WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000000f0b63ad0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) > at org.apache.geode.internal.cache.BackupLock.lock(BackupLock.java:95) > at org.apache.geode.internal.cache.Oplog.basicModify(Oplog.java:4612) > at org.apache.geode.internal.cache.Oplog.modify(Oplog.java:4450) > at org.apache.geode.internal.cache.PersistentOplogSet.modify(PersistentOplogSet.java:188) > at org.apache.geode.internal.cache.DiskStoreImpl.put(DiskStoreImpl.java:730) > at org.apache.geode.internal.cache.DiskRegion.put(DiskRegion.java:351) > at org.apache.geode.internal.cache.entries.DiskEntry$Helper.writeBytesToDisk(DiskEntry.java:828) > at org.apache.geode.internal.cache.entries.DiskEntry$Helper.basicUpdate(DiskEntry.java:932) > at org.apache.geode.internal.cache.entries.DiskEntry$Helper.update(DiskEntry.java:859) > - locked <0x00000000f1610f70> (a org.apache.geode.internal.cache.DiskId$PersistenceWithIntOffset) > at org.apache.geode.internal.cache.entries.AbstractDiskRegionEntry.setValue(AbstractDiskRegionEntry.java:40) > at org.apache.geode.internal.cache.entries.AbstractRegionEntry.setValueWithTombstoneCheck(AbstractRegionEntry.java:307) > at org.apache.geode.internal.cache.EntryEventImpl.setNewValueInRegion(EntryEventImpl.java:1651) > at org.apache.geode.internal.cache.EntryEventImpl.putExistingEntry(EntryEventImpl.java:1527) > at org.apache.geode.internal.cache.AbstractRegionMap.updateEntry(AbstractRegionMap.java:2993) > at org.apache.geode.internal.cache.AbstractRegionMap.basicPut(AbstractRegionMap.java:2816) > - locked <0x00000000f1610f28> (a org.apache.geode.internal.cache.entries.VersionedThinDiskRegionEntryOffHeapStringKey2) > at org.apache.geode.internal.cache.BucketRegion.virtualPut(BucketRegion.java:502) > at org.apache.geode.internal.cache.LocalRegionDataView.putEntry(LocalRegionDataView.java:152) > at org.apache.geode.internal.cache.LocalRegion.basicUpdate(LocalRegion.java:5584) > at org.apache.geode.internal.cache.AbstractUpdateOperation.doPutOrCreate(AbstractUpdateOperation.java:165) > at org.apache.geode.internal.cache.AbstractUpdateOperation$AbstractUpdateMessage.basicOperateOnRegion(AbstractUpdateOperation.java:272) > at org.apache.geode.internal.cache.AbstractUpdateOperation$AbstractUpdateMessage.operateOnRegion(AbstractUpdateOperation.java:243) > at org.apache.geode.internal.cache.DistributedCacheOperation$CacheOperationMessage.basicProcess(DistributedCacheOperation.java:1190) > at org.apache.geode.internal.cache.DistributedCacheOperation$CacheOperationMessage.process(DistributedCacheOperation.java:1091) > at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:374) > at org.apache.geode.distributed.internal.DistributionMessage.schedule(DistributionMessage.java:432) > at org.apache.geode.distributed.internal.DistributionManager.scheduleIncomingMessage(DistributionManager.java:3552) > at org.apache.geode.distributed.internal.DistributionManager.handleIncomingDMsg(DistributionManager.java:3186) > at org.apache.geode.distributed.internal.DistributionManager$MyListener.messageReceived(DistributionManager.java:4361) > at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.dispatchMessage(GMSMembershipManager.java:1127) > at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.handleOrDeferMessage(GMSMembershipManager.java:1045) > at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$MyDCReceiver.messageReceived(GMSMembershipManager.java:408) > at org.apache.geode.distributed.internal.direct.DirectChannel.receive(DirectChannel.java:714) > at org.apache.geode.internal.tcp.TCPConduit.messageReceived(TCPConduit.java:874) > at org.apache.geode.internal.tcp.Connection.dispatchMessage(Connection.java:3966) > at org.apache.geode.internal.tcp.Connection.processNIOBuffer(Connection.java:3552) > at org.apache.geode.internal.tcp.Connection.runNioReader(Connection.java:1828) > at org.apache.geode.internal.tcp.Connection.run(Connection.java:1689) > at java.lang.Thread.run(Thread.java:748) > Note that the replying member is also the member that initiated the backup: > "vm_4_thr_43_client5_rs-CommunicationsBTTest-2017-10-27-08-42-54-client-8_14524" #480 daemon prio=5 os_prio=0 tid=0x00007ff99c0b2800 nid=0x6911 waiting on condition [0x00007ffa208cf000] > java.lang.Thread.State: TIMED_WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000000f214ef08> (a java.util.concurrent.CountDownLatch$Sync) > at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(AbstractQueuedSynchronizer.java:1037) > at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(AbstractQueuedSynchronizer.java:1328) > at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:277) > at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:77) > at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:694) > at org.apache.geode.distributed.internal.ReplyProcessor21.waitForReplies(ReplyProcessor21.java:644) > at org.apache.geode.distributed.internal.ReplyProcessor21.waitForReplies(ReplyProcessor21.java:624) > at org.apache.geode.distributed.internal.ReplyProcessor21.waitForReplies(ReplyProcessor21.java:519) > at org.apache.geode.admin.internal.FinishBackupRequest.send(FinishBackupRequest.java:80) > at org.apache.geode.admin.internal.BackupDataStoreHelper.backupAllMembers(BackupDataStoreHelper.java:47) > at org.apache.geode.internal.cache.BackupUtil.backupAllMembers(BackupUtil.java:50) > at org.apache.geode.admin.internal.AdminDistributedSystemImpl.backupAllMembers(AdminDistributedSystemImpl.java:2315) > at org.apache.geode.admin.internal.AdminDistributedSystemImpl.backupAllMembers(AdminDistributedSystemImpl.java:2310) > at org.apache.geode.admin.internal.AdminDistributedSystemImpl.backupAllMembers(AdminDistributedSystemImpl.java:2299) > at util.PersistenceUtil.doOnlineBackup(PersistenceUtil.java:627) > at parReg.ParRegTest.doConcOpsAndVerify(ParRegTest.java:1822) > at parReg.ParRegTest.HydraTask_doConcOpsAndVerify(ParRegTest.java:1011) > at sun.reflect.GeneratedMethodAccessor401.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at hydra.MethExecutor.execute(MethExecutor.java:182) > at hydra.MethExecutor.execute(MethExecutor.java:150) > at hydra.TestTask.execute(TestTask.java:191) > at hydra.RemoteTestModule$1.run(RemoteTestModule.java:212) > The replying member is still holding the backup lock because it waits for all other members to reply to the FinishBackupRequest message > before it processes its backup and releases the lock: > From FinishBackupRequest.java > public static Map> send(DM dm, Set recipients, > File targetDir, File baselineDir, boolean abort) { > FinishBackupRequest request = new FinishBackupRequest(targetDir, baselineDir, abort); > request.setRecipients(recipients); > FinishBackupReplyProcessor replyProcessor = new FinishBackupReplyProcessor(dm, recipients); > request.msgId = replyProcessor.getProcessorId(); > dm.putOutgoing(request); > try { > replyProcessor.waitForReplies(); <======= wait for all other members (but they won't finish until this member does its work; this is where we are stuck) > } catch (ReplyException e) { > if (!(e.getCause() instanceof CancelException)) { > throw e; > } > } catch (InterruptedException e) { > e.printStackTrace(); > } > AdminResponse response = request.createResponse((DistributionManager) dm); <====== this and the following lines do this member's work > response.setSender(dm.getDistributionManagerId()); > replyProcessor.process(response); > return replyProcessor.results; > } > {noformat} -- This message was sent by Atlassian JIRA (v6.4.14#64029)