Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6AF1E18F90 for ; Thu, 26 Nov 2015 04:29:33 +0000 (UTC) Received: (qmail 9782 invoked by uid 500); 26 Nov 2015 04:29:29 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 9693 invoked by uid 500); 26 Nov 2015 04:29:29 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 6034 invoked by uid 99); 26 Nov 2015 04:29:26 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Nov 2015 04:29:26 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 714AEE18F9; Thu, 26 Nov 2015 04:29:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: busbey@apache.org To: commits@hbase.apache.org Date: Thu, 26 Nov 2015 04:29:51 -0000 Message-Id: In-Reply-To: <3bdc660350bc4ea49fdaa83a55939a7b@git.apache.org> References: <3bdc660350bc4ea49fdaa83a55939a7b@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [26/51] [partial] hbase git commit: Published site at e73a9594c218ed969a2f5b0b356d7b8d0e1474c0. http://git-wip-us.apache.org/repos/asf/hbase/blob/a986dfe6/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegion.RowLockImpl.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegion.RowLockImpl.html b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegion.RowLockImpl.html index 88901b0..fe65cea 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegion.RowLockImpl.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegion.RowLockImpl.html @@ -2982,5132 +2982,5131 @@ 2974 } 2975 if (rowLock == null) { 2976 // We failed to grab another lock -2977 assert false: "Should never fail to get lock when blocking"; -2978 break; // stop acquiring more rows for this batch -2979 } else { -2980 acquiredRowLocks.add(rowLock); -2981 } -2982 -2983 lastIndexExclusive++; -2984 numReadyToWrite++; -2985 -2986 if (isPutMutation) { -2987 // If Column Families stay consistent through out all of the -2988 // individual puts then metrics can be reported as a mutliput across -2989 // column families in the first put. -2990 if (putsCfSet == null) { -2991 putsCfSet = mutation.getFamilyCellMap().keySet(); -2992 } else { -2993 putsCfSetConsistent = putsCfSetConsistent -2994 && mutation.getFamilyCellMap().keySet().equals(putsCfSet); -2995 } -2996 } else { -2997 if (deletesCfSet == null) { -2998 deletesCfSet = mutation.getFamilyCellMap().keySet(); -2999 } else { -3000 deletesCfSetConsistent = deletesCfSetConsistent -3001 && mutation.getFamilyCellMap().keySet().equals(deletesCfSet); -3002 } -3003 } -3004 } -3005 -3006 // we should record the timestamp only after we have acquired the rowLock, -3007 // otherwise, newer puts/deletes are not guaranteed to have a newer timestamp -3008 now = EnvironmentEdgeManager.currentTime(); -3009 byte[] byteNow = Bytes.toBytes(now); -3010 -3011 // Nothing to put/delete -- an exception in the above such as NoSuchColumnFamily? -3012 if (numReadyToWrite <= 0) return 0L; -3013 -3014 // We've now grabbed as many mutations off the list as we can -3015 -3016 // ------------------------------------ -3017 // STEP 2. Update any LATEST_TIMESTAMP timestamps -3018 // ---------------------------------- -3019 for (int i = firstIndex; !isInReplay && i < lastIndexExclusive; i++) { -3020 // skip invalid -3021 if (batchOp.retCodeDetails[i].getOperationStatusCode() -3022 != OperationStatusCode.NOT_RUN) continue; -3023 -3024 Mutation mutation = batchOp.getMutation(i); -3025 if (mutation instanceof Put) { -3026 updateCellTimestamps(familyMaps[i].values(), byteNow); -3027 noOfPuts++; -3028 } else { -3029 prepareDeleteTimestamps(mutation, familyMaps[i], byteNow); -3030 noOfDeletes++; -3031 } -3032 rewriteCellTags(familyMaps[i], mutation); -3033 } -3034 -3035 lock(this.updatesLock.readLock(), numReadyToWrite); -3036 locked = true; -3037 -3038 // calling the pre CP hook for batch mutation -3039 if (!isInReplay && coprocessorHost != null) { -3040 MiniBatchOperationInProgress<Mutation> miniBatchOp = -3041 new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(), -3042 batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive); -3043 if (coprocessorHost.preBatchMutate(miniBatchOp)) return 0L; -3044 } -3045 -3046 // ------------------------------------ -3047 // STEP 3. Build WAL edit -3048 // ---------------------------------- -3049 Durability durability = Durability.USE_DEFAULT; -3050 for (int i = firstIndex; i < lastIndexExclusive; i++) { -3051 // Skip puts that were determined to be invalid during preprocessing -3052 if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.NOT_RUN) { -3053 continue; -3054 } -3055 -3056 Mutation m = batchOp.getMutation(i); -3057 Durability tmpDur = getEffectiveDurability(m.getDurability()); -3058 if (tmpDur.ordinal() > durability.ordinal()) { -3059 durability = tmpDur; -3060 } -3061 if (tmpDur == Durability.SKIP_WAL) { -3062 recordMutationWithoutWal(m.getFamilyCellMap()); -3063 continue; -3064 } -3065 -3066 long nonceGroup = batchOp.getNonceGroup(i), nonce = batchOp.getNonce(i); -3067 // In replay, the batch may contain multiple nonces. If so, write WALEdit for each. -3068 // Given how nonces are originally written, these should be contiguous. -3069 // They don't have to be, it will still work, just write more WALEdits than needed. -3070 if (nonceGroup != currentNonceGroup || nonce != currentNonce) { -3071 if (walEdit.size() > 0) { -3072 assert isInReplay; -3073 if (!isInReplay) { -3074 throw new IOException("Multiple nonces per batch and not in replay"); -3075 } -3076 // txid should always increase, so having the one from the last call is ok. -3077 // we use HLogKey here instead of WALKey directly to support legacy coprocessors. -3078 walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(), -3079 this.htableDescriptor.getTableName(), now, m.getClusterIds(), -3080 currentNonceGroup, currentNonce, mvcc); -3081 txid = this.wal.append(this.htableDescriptor, this.getRegionInfo(), walKey, -3082 walEdit, true); -3083 walEdit = new WALEdit(isInReplay); -3084 walKey = null; -3085 } -3086 currentNonceGroup = nonceGroup; -3087 currentNonce = nonce; -3088 } -3089 -3090 // Add WAL edits by CP -3091 WALEdit fromCP = batchOp.walEditsFromCoprocessors[i]; -3092 if (fromCP != null) { -3093 for (Cell cell : fromCP.getCells()) { -3094 walEdit.add(cell); -3095 } -3096 } -3097 addFamilyMapToWALEdit(familyMaps[i], walEdit); -3098 } -3099 -3100 // ------------------------- -3101 // STEP 4. Append the final edit to WAL. Do not sync wal. -3102 // ------------------------- -3103 Mutation mutation = batchOp.getMutation(firstIndex); -3104 if (isInReplay) { -3105 // use wal key from the original -3106 walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(), -3107 this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, -3108 mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc); -3109 long replaySeqId = batchOp.getReplaySequenceId(); -3110 walKey.setOrigLogSeqNum(replaySeqId); -3111 } -3112 if (walEdit.size() > 0) { -3113 if (!isInReplay) { -3114 // we use HLogKey here instead of WALKey directly to support legacy coprocessors. -3115 walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(), -3116 this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now, -3117 mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc); -3118 } -3119 txid = this.wal.append(this.htableDescriptor, this.getRegionInfo(), walKey, walEdit, true); -3120 } -3121 // ------------------------------------ -3122 // Acquire the latest mvcc number -3123 // ---------------------------------- -3124 if (walKey == null) { -3125 // If this is a skip wal operation just get the read point from mvcc -3126 walKey = this.appendEmptyEdit(this.wal); -3127 } -3128 if (!isInReplay) { -3129 writeEntry = walKey.getWriteEntry(); -3130 mvccNum = writeEntry.getWriteNumber(); -3131 } else { -3132 mvccNum = batchOp.getReplaySequenceId(); -3133 } -3134 -3135 // ------------------------------------ -3136 // STEP 5. Write back to memstore -3137 // Write to memstore. It is ok to write to memstore -3138 // first without syncing the WAL because we do not roll -3139 // forward the memstore MVCC. The MVCC will be moved up when -3140 // the complete operation is done. These changes are not yet -3141 // visible to scanners till we update the MVCC. The MVCC is -3142 // moved only when the sync is complete. -3143 // ---------------------------------- -3144 long addedSize = 0; -3145 for (int i = firstIndex; i < lastIndexExclusive; i++) { -3146 if (batchOp.retCodeDetails[i].getOperationStatusCode() -3147 != OperationStatusCode.NOT_RUN) { -3148 continue; -3149 } -3150 doRollBackMemstore = true; // If we have a failure, we need to clean what we wrote -3151 addedSize += applyFamilyMapToMemstore(familyMaps[i], mvccNum, isInReplay); -3152 } -3153 -3154 // ------------------------------- -3155 // STEP 6. Release row locks, etc. -3156 // ------------------------------- -3157 if (locked) { -3158 this.updatesLock.readLock().unlock(); -3159 locked = false; -3160 } -3161 releaseRowLocks(acquiredRowLocks); -3162 -3163 // ------------------------- -3164 // STEP 7. Sync wal. -3165 // ------------------------- -3166 if (txid != 0) { -3167 syncOrDefer(txid, durability); -3168 } -3169 -3170 doRollBackMemstore = false; -3171 // calling the post CP hook for batch mutation -3172 if (!isInReplay && coprocessorHost != null) { -3173 MiniBatchOperationInProgress<Mutation> miniBatchOp = -3174 new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(), -3175 batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive); -3176 coprocessorHost.postBatchMutate(miniBatchOp); -3177 } -3178 -3179 // ------------------------------------------------------------------ -3180 // STEP 8. Advance mvcc. This will make this put visible to scanners and getters. -3181 // ------------------------------------------------------------------ -3182 if (writeEntry != null) { -3183 mvcc.completeAndWait(writeEntry); -3184 writeEntry = null; -3185 } else if (isInReplay) { -3186 // ensure that the sequence id of the region is at least as big as orig log seq id -3187 mvcc.advanceTo(mvccNum); -3188 } -3189 -3190 for (int i = firstIndex; i < lastIndexExclusive; i ++) { -3191 if (batchOp.retCodeDetails[i] == OperationStatus.NOT_RUN) { -3192 batchOp.retCodeDetails[i] = OperationStatus.SUCCESS; -3193 } -3194 } -3195 -3196 // ------------------------------------ -3197 // STEP 9. Run coprocessor post hooks. This should be done after the wal is -3198 // synced so that the coprocessor contract is adhered to. -3199 // ------------------------------------ -3200 if (!isInReplay && coprocessorHost != null) { -3201 for (int i = firstIndex; i < lastIndexExclusive; i++) { -3202 // only for successful puts -3203 if (batchOp.retCodeDetails[i].getOperationStatusCode() -3204 != OperationStatusCode.SUCCESS) { -3205 continue; -3206 } -3207 Mutation m = batchOp.getMutation(i); -3208 if (m instanceof Put) { -3209 coprocessorHost.postPut((Put) m, walEdit, m.getDurability()); -3210 } else { -3211 coprocessorHost.postDelete((Delete) m, walEdit, m.getDurability()); -3212 } -3213 } -3214 } -3215 -3216 success = true; -3217 return addedSize; -3218 } finally { -3219 // if the wal sync was unsuccessful, remove keys from memstore -3220 if (doRollBackMemstore) { -3221 for (int j = 0; j < familyMaps.length; j++) { -3222 for(List<Cell> cells:familyMaps[j].values()) { -3223 rollbackMemstore(cells); -3224 } -3225 } -3226 if (writeEntry != null) mvcc.complete(writeEntry); -3227 } else if (writeEntry != null) { -3228 mvcc.completeAndWait(writeEntry); -3229 } -3230 -3231 if (locked) { -3232 this.updatesLock.readLock().unlock(); -3233 } -3234 releaseRowLocks(acquiredRowLocks); -3235 -3236 // See if the column families were consistent through the whole thing. -3237 // if they were then keep them. If they were not then pass a null. -3238 // null will be treated as unknown. -3239 // Total time taken might be involving Puts and Deletes. -3240 // Split the time for puts and deletes based on the total number of Puts and Deletes. -3241 -3242 if (noOfPuts > 0) { -3243 // There were some Puts in the batch. -3244 if (this.metricsRegion != null) { -3245 this.metricsRegion.updatePut(); -3246 } -3247 } -3248 if (noOfDeletes > 0) { -3249 // There were some Deletes in the batch. -3250 if (this.metricsRegion != null) { -3251 this.metricsRegion.updateDelete(); -3252 } -3253 } -3254 if (!success) { -3255 for (int i = firstIndex; i < lastIndexExclusive; i++) { -3256 if (batchOp.retCodeDetails[i].getOperationStatusCode() == OperationStatusCode.NOT_RUN) { -3257 batchOp.retCodeDetails[i] = OperationStatus.FAILURE; -3258 } -3259 } -3260 } -3261 if (coprocessorHost != null && !batchOp.isInReplay()) { -3262 // call the coprocessor hook to do any finalization steps -3263 // after the put is done -3264 MiniBatchOperationInProgress<Mutation> miniBatchOp = -3265 new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(), -3266 batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, -3267 lastIndexExclusive); -3268 coprocessorHost.postBatchMutateIndispensably(miniBatchOp, success); -3269 } -3270 -3271 batchOp.nextIndexToProcess = lastIndexExclusive; -3272 } -3273 } -3274 -3275 /** -3276 * Returns effective durability from the passed durability and -3277 * the table descriptor. -3278 */ -3279 protected Durability getEffectiveDurability(Durability d) { -3280 return d == Durability.USE_DEFAULT ? this.durability : d; -3281 } -3282 -3283 //TODO, Think that gets/puts and deletes should be refactored a bit so that -3284 //the getting of the lock happens before, so that you would just pass it into -3285 //the methods. So in the case of checkAndMutate you could just do lockRow, -3286 //get, put, unlockRow or something -3287 -3288 @Override -3289 public boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, -3290 CompareOp compareOp, ByteArrayComparable comparator, Mutation w, -3291 boolean writeToWAL) -3292 throws IOException{ -3293 checkReadOnly(); -3294 //TODO, add check for value length or maybe even better move this to the -3295 //client if this becomes a global setting -3296 checkResources(); -3297 boolean isPut = w instanceof Put; -3298 if (!isPut && !(w instanceof Delete)) -3299 throw new org.apache.hadoop.hbase.DoNotRetryIOException("Action must " + -3300 "be Put or Delete"); -3301 if (!Bytes.equals(row, w.getRow())) { -3302 throw new org.apache.hadoop.hbase.DoNotRetryIOException("Action's " + -3303 "getRow must match the passed row"); -3304 } -3305 -3306 startRegionOperation(); -3307 try { -3308 Get get = new Get(row); -3309 checkFamily(family); -3310 get.addColumn(family, qualifier); -3311 -3312 // Lock row - note that doBatchMutate will relock this row if called -3313 RowLock rowLock = getRowLock(get.getRow()); -3314 // wait for all previous transactions to complete (with lock held) -3315 mvcc.await(); -3316 try { -3317 if (this.getCoprocessorHost() != null) { -3318 Boolean processed = null; -3319 if (w instanceof Put) { -3320 processed = this.getCoprocessorHost().preCheckAndPutAfterRowLock(row, family, -3321 qualifier, compareOp, comparator, (Put) w); -3322 } else if (w instanceof Delete) { -3323 processed = this.getCoprocessorHost().preCheckAndDeleteAfterRowLock(row, family, -3324 qualifier, compareOp, comparator, (Delete) w); -3325 } -3326 if (processed != null) { -3327 return processed; -3328 } -3329 } -3330 List<Cell> result = get(get, false); -3331 -3332 boolean valueIsNull = comparator.getValue() == null || -3333 comparator.getValue().length == 0; -3334 boolean matches = false; -3335 long cellTs = 0; -3336 if (result.size() == 0 && valueIsNull) { -3337 matches = true; -3338 } else if (result.size() > 0 && result.get(0).getValueLength() == 0 && -3339 valueIsNull) { -3340 matches = true; -3341 cellTs = result.get(0).getTimestamp(); -3342 } else if (result.size() == 1 && !valueIsNull) { -3343 Cell kv = result.get(0); -3344 cellTs = kv.getTimestamp(); -3345 int compareResult = CellComparator.compareValue(kv, comparator); -3346 switch (compareOp) { -3347 case LESS: -3348 matches = compareResult < 0; -3349 break; -3350 case LESS_OR_EQUAL: -3351 matches = compareResult <= 0; -3352 break; -3353 case EQUAL: -3354 matches = compareResult == 0; -3355 break; -3356 case NOT_EQUAL: -3357 matches = compareResult != 0; -3358 break; -3359 case GREATER_OR_EQUAL: -3360 matches = compareResult >= 0; -3361 break; -3362 case GREATER: -3363 matches = compareResult > 0; -3364 break; -3365 default: -3366 throw new RuntimeException("Unknown Compare op " + compareOp.name()); -3367 } -3368 } -3369 //If matches put the new put or delete the new delete -3370 if (matches) { -3371 // We have acquired the row lock already. If the system clock is NOT monotonically -3372 // non-decreasing (see HBASE-14070) we should make sure that the mutation has a -3373 // larger timestamp than what was observed via Get. doBatchMutate already does this, but -3374 // there is no way to pass the cellTs. See HBASE-14054. -3375 long now = EnvironmentEdgeManager.currentTime(); -3376 long ts = Math.max(now, cellTs); // ensure write is not eclipsed -3377 byte[] byteTs = Bytes.toBytes(ts); -3378 -3379 if (w instanceof Put) { -3380 updateCellTimestamps(w.getFamilyCellMap().values(), byteTs); -3381 } -3382 // else delete is not needed since it already does a second get, and sets the timestamp -3383 // from get (see prepareDeleteTimestamps). -3384 -3385 // All edits for the given row (across all column families) must -3386 // happen atomically. -3387 doBatchMutate(w); -3388 this.checkAndMutateChecksPassed.increment(); -3389 return true; -3390 } -3391 this.checkAndMutateChecksFailed.increment(); -3392 return false; -3393 } finally { -3394 rowLock.release(); -3395 } -3396 } finally { -3397 closeRegionOperation(); -3398 } -3399 } -3400 -3401 //TODO, Think that gets/puts and deletes should be refactored a bit so that -3402 //the getting of the lock happens before, so that you would just pass it into -3403 //the methods. So in the case of checkAndMutate you could just do lockRow, -3404 //get, put, unlockRow or something -3405 -3406 @Override -3407 public boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, -3408 CompareOp compareOp, ByteArrayComparable comparator, RowMutations rm, -3409 boolean writeToWAL) throws IOException { -3410 checkReadOnly(); -3411 //TODO, add check for value length or maybe even better move this to the -3412 //client if this becomes a global setting -3413 checkResources(); -3414 -3415 startRegionOperation(); -3416 try { -3417 Get get = new Get(row); -3418 checkFamily(family); -3419 get.addColumn(family, qualifier); -3420 -3421 // Lock row - note that doBatchMutate will relock this row if called -3422 RowLock rowLock = getRowLock(get.getRow()); -3423 // wait for all previous transactions to complete (with lock held) -3424 mvcc.await(); -3425 try { -3426 List<Cell> result = get(get, false); -3427 -3428 boolean valueIsNull = comparator.getValue() == null || -3429 comparator.getValue().length == 0; -3430 boolean matches = false; -3431 long cellTs = 0; -3432 if (result.size() == 0 && valueIsNull) { -3433 matches = true; -3434 } else if (result.size() > 0 && result.get(0).getValueLength() == 0 && -3435 valueIsNull) { -3436 matches = true; -3437 cellTs = result.get(0).getTimestamp(); -3438 } else if (result.size() == 1 && !valueIsNull) { -3439 Cell kv = result.get(0); -3440 cellTs = kv.getTimestamp(); -3441 int compareResult = CellComparator.compareValue(kv, comparator); -3442 switch (compareOp) { -3443 case LESS: -3444 matches = compareResult < 0; -3445 break; -3446 case LESS_OR_EQUAL: -3447 matches = compareResult <= 0; -3448 break; -3449 case EQUAL: -3450 matches = compareResult == 0; -3451 break; -3452 case NOT_EQUAL: -3453 matches = compareResult != 0; -3454 break; -3455 case GREATER_OR_EQUAL: -3456 matches = compareResult >= 0; -3457 break; -3458 case GREATER: -3459 matches = compareResult > 0; -3460 break; -3461 default: -3462 throw new RuntimeException("Unknown Compare op " + compareOp.name()); -3463 } -3464 } -3465 //If matches put the new put or delete the new delete -3466 if (matches) { -3467 // We have acquired the row lock already. If the system clock is NOT monotonically -3468 // non-decreasing (see HBASE-14070) we should make sure that the mutation has a -3469 // larger timestamp than what was observed via Get. doBatchMutate already does this, but -3470 // there is no way to pass the cellTs. See HBASE-14054. -3471 long now = EnvironmentEdgeManager.currentTime(); -3472 long ts = Math.max(now, cellTs); // ensure write is not eclipsed -3473 byte[] byteTs = Bytes.toBytes(ts); -3474 -3475 for (Mutation w : rm.getMutations()) { -3476 if (w instanceof Put) { -3477 updateCellTimestamps(w.getFamilyCellMap().values(), byteTs); -3478 } -3479 // else delete is not needed since it already does a second get, and sets the timestamp -3480 // from get (see prepareDeleteTimestamps). -3481 } -3482 -3483 // All edits for the given row (across all column families) must -3484 // happen atomically. -3485 mutateRow(rm); -3486 this.checkAndMutateChecksPassed.increment(); -3487 return true; -3488 } -3489 this.checkAndMutateChecksFailed.increment(); -3490 return false; -3491 } finally { -3492 rowLock.release(); -3493 } -3494 } finally { -3495 closeRegionOperation(); -3496 } -3497 } -3498 -3499 private void doBatchMutate(Mutation mutation) throws IOException { -3500 // Currently this is only called for puts and deletes, so no nonces. -3501 OperationStatus[] batchMutate = this.batchMutate(new Mutation[]{mutation}); -3502 if (batchMutate[0].getOperationStatusCode().equals(OperationStatusCode.SANITY_CHECK_FAILURE)) { -3503 throw new FailedSanityCheckException(batchMutate[0].getExceptionMsg()); -3504 } else if (batchMutate[0].getOperationStatusCode().equals(OperationStatusCode.BAD_FAMILY)) { -3505 throw new NoSuchColumnFamilyException(batchMutate[0].getExceptionMsg()); -3506 } -3507 } -3508 -3509 /** -3510 * Complete taking the snapshot on the region. Writes the region info and adds references to the -3511 * working snapshot directory. -3512 * -3513 * TODO for api consistency, consider adding another version with no {@link ForeignExceptionSnare} -3514 * arg. (In the future other cancellable HRegion methods could eventually add a -3515 * {@link ForeignExceptionSnare}, or we could do something fancier). -3516 * -3517 * @param desc snapshot description object -3518 * @param exnSnare ForeignExceptionSnare that captures external exceptions in case we need to -3519 * bail out. This is allowed to be null and will just be ignored in that case. -3520 * @throws IOException if there is an external or internal error causing the snapshot to fail -3521 */ -3522 public void addRegionToSnapshot(SnapshotDescription desc, -3523 ForeignExceptionSnare exnSnare) throws IOException { -3524 Path rootDir = FSUtils.getRootDir(conf); -3525 Path snapshotDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir); -3526 -3527 SnapshotManifest manifest = SnapshotManifest.create(conf, getFilesystem(), -3528 snapshotDir, desc, exnSnare); -3529 manifest.addRegion(this); -3530 -3531 // The regionserver holding the first region of the table is responsible for taking the -3532 // manifest of the mob dir. -3533 if (!Bytes.equals(getRegionInfo().getStartKey(), HConstants.EMPTY_START_ROW)) -3534 return; -3535 -3536 // if any cf's have is mob enabled, add the "mob region" to the manifest. -3537 List<Store> stores = getStores(); -3538 for (Store store : stores) { -3539 boolean hasMobStore = store.getFamily().isMobEnabled(); -3540 if (hasMobStore) { -3541 // use the .mob as the start key and 0 as the regionid -3542 HRegionInfo mobRegionInfo = MobUtils.getMobRegionInfo(this.getTableDesc().getTableName()); -3543 mobRegionInfo.setOffline(true); -3544 manifest.addMobRegion(mobRegionInfo, this.getTableDesc().getColumnFamilies()); -3545 return; -3546 } -3547 } -3548 } -3549 -3550 @Override -3551 public void updateCellTimestamps(final Iterable<List<Cell>> cellItr, final byte[] now) -3552 throws IOException { -3553 for (List<Cell> cells: cellItr) { -3554 if (cells == null) continue; -3555 assert cells instanceof RandomAccess; -3556 int listSize = cells.size(); -3557 for (int i = 0; i < listSize; i++) { -3558 CellUtil.updateLatestStamp(cells.get(i), now, 0); -3559 } -3560 } -3561 } -3562 -3563 /** -3564 * Possibly rewrite incoming cell tags. -3565 */ -3566 void rewriteCellTags(Map<byte[], List<Cell>> familyMap, final Mutation m) { -3567 // Check if we have any work to do and early out otherwise -3568 // Update these checks as more logic is added here -3569 -3570 if (m.getTTL() == Long.MAX_VALUE) { -3571 return; -3572 } -3573 -3574 // From this point we know we have some work to do -3575 -3576 for (Map.Entry<byte[], List<Cell>> e: familyMap.entrySet()) { -3577 List<Cell> cells = e.getValue(); -3578 assert cells instanceof RandomAccess; -3579 int listSize = cells.size(); -3580 for (int i = 0; i < listSize; i++) { -3581 Cell cell = cells.get(i); -3582 List<Tag> newTags = new ArrayList<Tag>(); -3583 Iterator<Tag> tagIterator = CellUtil.tagsIterator(cell.getTagsArray(), -3584 cell.getTagsOffset(), cell.getTagsLength()); -3585 -3586 // Carry forward existing tags -3587 -3588 while (tagIterator.hasNext()) { -3589 -3590 // Add any filters or tag specific rewrites here -3591 -3592 newTags.add(tagIterator.next()); -3593 } -3594 -3595 // Cell TTL handling -3596 -3597 // Check again if we need to add a cell TTL because early out logic -3598 // above may change when there are more tag based features in core. -3599 if (m.getTTL() != Long.MAX_VALUE) { -3600 // Add a cell TTL tag -3601 newTags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(m.getTTL()))); -3602 } -3603 -3604 // Rewrite the cell with the updated set of tags -3605 cells.set(i, new TagRewriteCell(cell, Tag.fromList(newTags))); -3606 } -3607 } -3608 } -3609 -3610 /* -3611 * Check if resources to support an update. -3612 * -3613 * We throw RegionTooBusyException if above memstore limit -3614 * and expect client to retry using some kind of backoff -3615 */ -3616 private void checkResources() throws RegionTooBusyException { -3617 // If catalog region, do not impose resource constraints or block updates. -3618 if (this.getRegionInfo().isMetaRegion()) return; -3619 -3620 if (this.memstoreSize.get() > this.blockingMemStoreSize) { -3621 blockedRequestsCount.increment(); -3622 requestFlush(); -3623 throw new RegionTooBusyException("Above memstore limit, " + -3624 "regionName=" + (this.getRegionInfo() == null ? "unknown" : -3625 this.getRegionInfo().getRegionNameAsString()) + -3626 ", server=" + (this.getRegionServerServices() == null ? "unknown" : -3627 this.getRegionServerServices().getServerName()) + -3628 ", memstoreSize=" + memstoreSize.get() + -3629 ", blockingMemStoreSize=" + blockingMemStoreSize); -3630 } -3631 } -3632 -3633 /** -3634 * @throws IOException Throws exception if region is in read-only mode. -3635 */ -3636 protected void checkReadOnly() throws IOException { -3637 if (isReadOnly()) { -3638 throw new DoNotRetryIOException("region is read only"); -3639 } -3640 } -3641 -3642 protected void checkReadsEnabled() throws IOException { -3643 if (!this.writestate.readsEnabled) { -3644 throw new IOException(getRegionInfo().getEncodedName() -3645 + ": The region's reads are disabled. Cannot serve the request"); -3646 } -3647 } -3648 -3649 public void setReadsEnabled(boolean readsEnabled) { -3650 if (readsEnabled && !this.writestate.readsEnabled) { -3651 LOG.info(getRegionInfo().getEncodedName() + " : Enabling reads for region."); -3652 } -3653 this.writestate.setReadsEnabled(readsEnabled); -3654 } -3655 -3656 /** -3657 * Add updates first to the wal and then add values to memstore. -3658 * Warning: Assumption is caller has lock on passed in row. -3659 * @param edits Cell updates by column -3660 * @throws IOException -3661 */ -3662 private void put(final byte [] row, byte [] family, List<Cell> edits) -3663 throws IOException { -3664 NavigableMap<byte[], List<Cell>> familyMap; -3665 familyMap = new TreeMap<byte[], List<Cell>>(Bytes.BYTES_COMPARATOR); -3666 -3667 familyMap.put(family, edits); -3668 Put p = new Put(row); -3669 p.setFamilyCellMap(familyMap); -3670 doBatchMutate(p); -3671 } -3672 -3673 /** -3674 * Atomically apply the given map of family->edits to the memstore. -3675 * This handles the consistency control on its own, but the caller -3676 * should already have locked updatesLock.readLock(). This also does -3677 * <b>not</b> check the families for validity. -3678 * -3679 * @param familyMap Map of kvs per family -3680 * @param mvccNum The MVCC for this transaction. -3681 * @param isInReplay true when adding replayed KVs into memstore -3682 * @return the additional memory usage of the memstore caused by the -3683 * new entries. -3684 */ -3685 private long applyFamilyMapToMemstore(Map<byte[], List<Cell>> familyMap, -3686 long mvccNum, boolean isInReplay) throws IOException { -3687 long size = 0; -3688 -3689 for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) { -3690 byte[] family = e.getKey(); -3691 List<Cell> cells = e.getValue(); -3692 assert cells instanceof RandomAccess; -3693 Store store = getStore(family); -3694 int listSize = cells.size(); -3695 for (int i=0; i < listSize; i++) { -3696 Cell cell = cells.get(i); -3697 if (cell.getSequenceId() == 0 || isInReplay) { -3698 CellUtil.setSequenceId(cell, mvccNum); -3699 } -3700 size += store.add(cell); -3701 } -3702 } -3703 -3704 return size; -3705 } -3706 -3707 /** -3708 * Remove all the keys listed in the map from the memstore. This method is -3709 * called when a Put/Delete has updated memstore but subsequently fails to update -3710 * the wal. This method is then invoked to rollback the memstore. -3711 */ -3712 private void rollbackMemstore(List<Cell> memstoreCells) { -3713 int kvsRolledback = 0; -3714 -3715 for (Cell cell : memstoreCells) { -3716 byte[] family = CellUtil.cloneFamily(cell); -3717 Store store = getStore(family); -3718 store.rollback(cell); -3719 kvsRolledback++; -3720 } -3721 LOG.debug("rollbackMemstore rolled back " + kvsRolledback); -3722 } -3723 -3724 @Override -3725 public void checkFamilies(Collection<byte[]> families) throws NoSuchColumnFamilyException { -3726 for (byte[] family : families) { -3727 checkFamily(family); -3728 } -3729 } -3730 -3731 /** -3732 * During replay, there could exist column families which are removed between region server -3733 * failure and replay -3734 */ -3735 private void removeNonExistentColumnFamilyForReplay( -3736 final Map<byte[], List<Cell>> familyMap) { -3737 List<byte[]> nonExistentList = null; -3738 for (byte[] family : familyMap.keySet()) { -3739 if (!this.htableDescriptor.hasFamily(family)) { -3740 if (nonExistentList == null) { -3741 nonExistentList = new ArrayList<byte[]>(); -3742 } -3743 nonExistentList.add(family); -3744 } -3745 } -3746 if (nonExistentList != null) { -3747 for (byte[] family : nonExistentList) { -3748 // Perhaps schema was changed between crash and replay -3749 LOG.info("No family for " + Bytes.toString(family) + " omit from reply."); -3750 familyMap.remove(family); -3751 } -3752 } -3753 } -3754 -3755 @Override -3756 public void checkTimestamps(final Map<byte[], List<Cell>> familyMap, long now) -3757 throws FailedSanityCheckException { -3758 if (timestampSlop == HConstants.LATEST_TIMESTAMP) { -3759 return; -3760 } -3761 long maxTs = now + timestampSlop; -3762 for (List<Cell> kvs : familyMap.values()) { -3763 assert kvs instanceof RandomAccess; -3764 int listSize = kvs.size(); -3765 for (int i=0; i < listSize; i++) { -3766 Cell cell = kvs.get(i); -3767 // see if the user-side TS is out of range. latest = server-side -3768 long ts = cell.getTimestamp(); -3769 if (ts != HConstants.LATEST_TIMESTAMP && ts > maxTs) { -3770 throw new FailedSanityCheckException("Timestamp for KV out of range " -3771 + cell + " (too.new=" + timestampSlop + ")"); -3772 } -3773 } -3774 } -3775 } -3776 -3777 /** -3778 * Append the given map of family->edits to a WALEdit data structure. -3779 * This does not write to the WAL itself. -3780 * @param familyMap map of family->edits -3781 * @param walEdit the destination entry to append into -3782 */ -3783 private void addFamilyMapToWALEdit(Map<byte[], List<Cell>> familyMap, -3784 WALEdit walEdit) { -3785 for (List<Cell> edits : familyMap.values()) { -3786 assert edits instanceof RandomAccess; -3787 int listSize = edits.size(); -3788 for (int i=0; i < listSize; i++) { -3789 Cell cell = edits.get(i); -3790 walEdit.add(cell); -3791 } -3792 } -3793 } -3794 -3795 private void requestFlush() { -3796 if (this.rsServices == null) { -3797 return; -3798 } -3799 synchronized (writestate) { -3800 if (this.writestate.isFlushRequested()) { -3801 return; -3802 } -3803 writestate.flushRequested = true; -3804 } -3805 // Make request outside of synchronize block; HBASE-818. -3806 this.rsServices.getFlushRequester().requestFlush(this, false); -3807 if (LOG.isDebugEnabled()) { -3808 LOG.debug("Flush requested on " + this.getRegionInfo().getEncodedName()); -3809 } -3810 } -3811 -3812 /* -3813 * @param size -3814 * @return True if size is over the flush threshold -3815 */ -3816 private boolean isFlushSize(final long size) { -3817 return size > this.memstoreFlushSize; -3818 } -3819 -3820 /** -3821 * Read the edits put under this region by wal splitting process. Put -3822 * the recovered edits back up into this region. -3823 * -3824 * <p>We can ignore any wal message that has a sequence ID that's equal to or -3825 * lower than minSeqId. (Because we know such messages are already -3826 * reflected in the HFiles.) -3827 * -3828 * <p>While this is running we are putting pressure on memory yet we are -3829 * outside of our usual accounting because we are not yet an onlined region -3830 * (this stuff is being run as part of Region initialization). This means -3831 * that if we're up against global memory limits, we'll not be flagged to flush -3832 * because we are not online. We can't be flushed by usual mechanisms anyways; -3833 * we're not yet online so our relative sequenceids are not yet aligned with -3834 * WAL sequenceids -- not till we come up online, post processing of split -3835 * edits. -3836 * -3837 * <p>But to help relieve memory pressure, at least manage our own heap size -3838 * flushing if are in excess of per-region limits. Flushing, though, we have -3839 * to be careful and avoid using the regionserver/wal sequenceid. Its running -3840 * on a different line to whats going on in here in this region context so if we -3841 * crashed replaying these edits, but in the midst had a flush that used the -3842 * regionserver wal with a s