trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From se...@apache.org
Subject [04/12] incubator-trafodion git commit: Fix to handle bad TM ZK node entries during recovery
Date Fri, 30 Sep 2016 21:05:51 GMT
Fix to handle bad TM ZK node entries during recovery


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/b352dd8f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/b352dd8f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/b352dd8f

Branch: refs/heads/master
Commit: b352dd8f1fdab8f73bf2f4dea3b0ac757dd6ad29
Parents: d7232d3 fd4fadf
Author: Sean Broeder <sbroeder@edev06.esgyn.local>
Authored: Wed Sep 28 23:20:57 2016 +0000
Committer: Sean Broeder <sbroeder@edev06.esgyn.local>
Committed: Thu Sep 29 10:29:22 2016 +0000

----------------------------------------------------------------------
 .../transactional/TransactionManager.java       | 15 ++----
 .../java/org/trafodion/dtm/HBaseTxClient.java   | 50 ++++++--------------
 2 files changed, 18 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b352dd8f/core/sqf/src/seatrans/hbase-trx/src/main/java/org/apache/hadoop/hbase/client/transactional/TransactionManager.java
----------------------------------------------------------------------
diff --cc core/sqf/src/seatrans/hbase-trx/src/main/java/org/apache/hadoop/hbase/client/transactional/TransactionManager.java
index 86b93a1,7ce8184..caf103f
--- a/core/sqf/src/seatrans/hbase-trx/src/main/java/org/apache/hadoop/hbase/client/transactional/TransactionManager.java
+++ b/core/sqf/src/seatrans/hbase-trx/src/main/java/org/apache/hadoop/hbase/client/transactional/TransactionManager.java
@@@ -2968,7 -2968,7 +2968,8 @@@ public class TransactionManager 
       * @return
       * @throws Exception
       */
--    public List<Long> recoveryRequest (String hostnamePort, byte[] regionArray, int
tmid) throws DeserializationException, IOException {
++    public List<Long> recoveryRequest (String hostnamePort, byte[] regionArray, int
tmid) throws DeserializationException, 
++           ServiceException, IOException, Throwable {
          if (LOG.isTraceEnabled()) LOG.trace("recoveryRequest -- ENTRY TM" + tmid);
          HRegionInfo regionInfo = null;
          HTable table = null;
@@@ -3006,17 -3006,17 +3007,7 @@@
              table = new HTable(regionInfo.getTable(), connection, cp_tpe);
  
              Map<byte[], RecoveryRequestResponse> rresult = null;
--            try {
--              rresult = table.coprocessorService(TrxRegionService.class, startKey, endKey,
callable);
--            }
--            catch (ServiceException se) {
-                 LOG.error("Service exception thrown when recoveryRequest: ", se);
-                 throw new IOException("Service exception thrown when recoveryRequest:",
se);
 -                LOG.error("Service exception thrown when calling recoveryRequest: ", se);
 -                throw new IOException("Service exception thrown when calling recoveryRequest:",
se);
--            }
--            catch (Throwable t) {
-                 LOG.error("Exception thrown when recoveryRequest: ", t);
-                 throw new IOException("Exception thrown when recoveryRequest: ", t);
 -                LOG.error("Exception thrown when calling recoveryRequest: ", t);
 -                throw new IOException("Exception thrown when calling recoveryRequest: ",
t);
--            }
++            rresult = table.coprocessorService(TrxRegionService.class, startKey, endKey,
callable);
  
          Collection<RecoveryRequestResponse> results = rresult.values();
          RecoveryRequestResponse[] resultArray = new RecoveryRequestResponse[results.size()];

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b352dd8f/core/sqf/src/seatrans/tm/hbasetmlib2/src/main/java/org/trafodion/dtm/HBaseTxClient.java
----------------------------------------------------------------------
diff --cc core/sqf/src/seatrans/tm/hbasetmlib2/src/main/java/org/trafodion/dtm/HBaseTxClient.java
index ca61b56,ca61b56..0389c02
--- a/core/sqf/src/seatrans/tm/hbasetmlib2/src/main/java/org/trafodion/dtm/HBaseTxClient.java
+++ b/core/sqf/src/seatrans/tm/hbasetmlib2/src/main/java/org/trafodion/dtm/HBaseTxClient.java
@@@ -1034,42 -1034,42 +1034,22 @@@ public class HBaseTxClient 
                                  }
                                  try {
                                      TxRecoverList = txnManager.recoveryRequest(hostnamePort,
regionBytes, tmID);
--                                }catch (NotServingRegionException e) {
--                                   TxRecoverList = null;
--                                   LOG.error("TRAF RCOV THREAD:NotServingRegionException
calling recoveryRequest. regionBytes: " + new String(regionBytes) +
--                                             " TM: " + tmID + " hostnamePort: " + hostnamePort,
e);
--
--                                      // First delete the zookeeper entry
--                                      LOG.error("TRAF RCOV THREAD:recoveryRequest. Deleting
region entry Entry: " + regionEntry);
--                                      zookeeper.deleteRegionEntry(regionEntry);
--                                      // Create a local HTable object using the regionInfo
--                                      HTable table = new HTable(config, HRegionInfo.parseFrom(regionBytes).getTable().getNameAsString());
--                                      // Repost a zookeeper entry for all current regions
in the table
--                                      zookeeper.postAllRegionEntries(table);
--                                }// NotServingRegionException
--                                catch (TableNotFoundException tnfe) {
--                                   // In this case there is nothing to recover.  We just
need to delete the region entry.
--                                      // First delete the zookeeper entry
--                                      LOG.warn("TRAF RCOV THREAD:TableNotFoundException
calling txnManager.recoveryRequest. " + "TM: " +
++                                catch (Exception e) {
++                                   // For all cases of Exception, we rely on the region
to redrive the request.
++                                   // Likely there is nothing to recover, due to a stale
region entry, but it is always safe to redrive.
++                                   // We log a warning event and delete the ZKNode entry.
++                                   LOG.warn("TRAF RCOV THREAD:TableNotFoundException calling
txnManager.recoveryRequest. " + "TM: " +
                                                tmID + " regionBytes: [" + regionBytes + "].
 Deleting zookeeper region entry. \n exception: " + tnfe);
--                                      zookeeper.deleteRegionEntry(regionEntry);
--
--                                }// TableNotFoundException
--                                catch (DeserializationException de) {
--                                   // We are unable to parse the region info from ZooKeeper
 We just need to delete the region entry.
--                                      // First delete the zookeeper entry
--                                      LOG.warn("TRAF RCOV THREAD:DeserializationException
calling txnManager.recoveryRequest. " + "TM: " +
--                                              tmID + " regionBytes: [" + regionBytes + "].
 Deleting zookeeper region entry. \n exception: " + de);
--                                      zookeeper.deleteRegionEntry(regionEntry);
--                                }// DeserializationException
--                                catch (IOException ioe) {
--                                    // It's possible we received a spurious entry from a
stale region.
--                                       // It's safe to delete the zookeeper entry and wait
for any new posts
--                                       LOG.warn("TRAF RCOV THREAD:IOException calling txnManager.recoveryRequest.
" + "TM: " +
--                                               tmID + " regionBytes: [" + regionBytes
--                                               + "].  Deleting zookeeper region entry. exception:
" + ioe);
--                                       zookeeper.deleteRegionEntry(regionEntry);
--                                 }// IOException
++                                   zookeeper.deleteRegionEntry(regionEntry);
++
++                                   // In the case of NotServingRegionException we will repost
the ZKNode after refreshing the table.
++                                   if (e instanceOf NotServingRegionException){
++                                       // Create a local HTable object using the regionInfo
++                                       HTable table = new HTable(config, HRegionInfo.parseFrom(regionBytes).getTable().getNameAsString());
++                                       // Repost a zookeeper entry for all current regions
in the table
++                                       zookeeper.postAllRegionEntries(table);
++                                   }
++                                } // Exception
  
                                  if (TxRecoverList != null) {
                                      if (LOG.isDebugEnabled()) LOG.trace("TRAF RCOV THREAD:size
of TxRecoverList " + TxRecoverList.size());


Mime
View raw message