Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 0FAF0200B69 for ; Sat, 20 Aug 2016 10:41:32 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0E2C0160AAA; Sat, 20 Aug 2016 08:41:32 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id D1A76160ABE for ; Sat, 20 Aug 2016 10:41:29 +0200 (CEST) Received: (qmail 78559 invoked by uid 500); 20 Aug 2016 08:41:28 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 78423 invoked by uid 99); 20 Aug 2016 08:41:28 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 20 Aug 2016 08:41:28 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A7E0BE00D6; Sat, 20 Aug 2016 08:41:28 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Sat, 20 Aug 2016 08:41:30 -0000 Message-Id: In-Reply-To: <2b7f5d06e69a41aeb9d7ce52b9fb2afe@git.apache.org> References: <2b7f5d06e69a41aeb9d7ce52b9fb2afe@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [3/3] hive git commit: HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin) archived-at: Sat, 20 Aug 2016 08:41:32 -0000 HIVE-13874: Tighten up EOF checking in Fast DeserializeRead classes; display better exception information; add new Unit Tests (Matt McCline, reviewed by Sergey Shelukhin) Conflicts: serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/69ab3b27 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/69ab3b27 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/69ab3b27 Branch: refs/heads/branch-2.1 Commit: 69ab3b2734fb66badb4e68e6e4387fac5e61e016 Parents: ef83643 Author: Matt McCline Authored: Sat Aug 20 01:32:45 2016 -0700 Committer: Matt McCline Committed: Sat Aug 20 01:34:55 2016 -0700 ---------------------------------------------------------------------- .../persistence/BytesBytesMultiHashMap.java | 29 +- .../hive/ql/exec/tez/ReduceRecordSource.java | 10 +- .../ql/exec/vector/VectorDeserializeRow.java | 32 +- .../hive/ql/exec/vector/VectorMapOperator.java | 10 +- .../VectorMapJoinGenerateResultOperator.java | 64 +- .../fast/VectorMapJoinFastLongHashTable.java | 11 +- .../fast/VectorMapJoinFastStringCommon.java | 14 +- .../fast/VectorMapJoinFastValueStore.java | 56 +- .../hashtable/VectorMapJoinHashMapResult.java | 4 +- .../VectorMapJoinOptimizedHashMap.java | 10 +- .../persistence/TestBytesBytesMultiHashMap.java | 2 - .../ql/exec/vector/TestVectorRowObject.java | 3 +- .../hive/ql/exec/vector/TestVectorSerDeRow.java | 12 +- .../ql/exec/vector/VectorRandomRowSource.java | 458 ++++++++++++ .../vector/mapjoin/fast/CheckFastHashTable.java | 19 +- .../mapjoin/fast/CheckFastRowHashMap.java | 387 ++++++++++ .../fast/TestVectorMapJoinFastBytesHashMap.java | 15 + .../fast/TestVectorMapJoinFastRowHashMap.java | 718 +++++++++++++++++++ .../exec/vector/mapjoin/fast/VerifyFastRow.java | 397 ++++++++++ .../fast/BinarySortableDeserializeRead.java | 65 +- .../hive/serde2/fast/DeserializeRead.java | 6 +- .../hive/serde2/fast/RandomRowObjectSource.java | 423 ----------- .../hadoop/hive/serde2/fast/SerializeWrite.java | 2 +- .../lazy/fast/LazySimpleDeserializeRead.java | 62 +- .../fast/LazyBinaryDeserializeRead.java | 146 ++-- .../hive/serde2/SerdeRandomRowSource.java | 423 +++++++++++ .../binarysortable/TestBinarySortableFast.java | 21 +- .../hive/serde2/lazy/TestLazySimpleFast.java | 22 +- .../serde2/lazybinary/TestLazyBinaryFast.java | 28 +- 29 files changed, 2745 insertions(+), 704 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index dd88461..6b89e98 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -298,8 +298,8 @@ public final class BytesBytesMultiHashMap { } /** - * Read the current value. - * + * Read the current value. + * * @return * The ByteSegmentRef to the current value read. */ @@ -380,29 +380,6 @@ public final class BytesBytesMultiHashMap { } /** - * @return Whether we have read all the values or not. - */ - public boolean isEof() { - // LOG.info("BytesBytesMultiHashMap isEof hasRows " + hasRows + " hasList " + hasList + " readIndex " + readIndex + " nextTailOffset " + nextTailOffset); - if (!hasRows) { - return true; - } - - if (!hasList) { - return (readIndex > 0); - } else { - // Multiple values. - if (readIndex <= 1) { - // Careful: We have not read the list record and 2nd value yet, so nextTailOffset - // is not valid yet. - return false; - } else { - return (nextTailOffset <= 0); - } - } - } - - /** * Lets go of any references to a hash map. */ public void forget() { @@ -741,7 +718,7 @@ public final class BytesBytesMultiHashMap { long capacity = refs.length << 1; expandAndRehashImpl(capacity); } - + private void expandAndRehashImpl(long capacity) { long expandTime = System.currentTimeMillis(); final long[] oldRefs = refs; http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index e966ff1..f4c3b81 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; @@ -413,7 +414,14 @@ public class ReduceRecordSource implements RecordSource { // VectorizedBatchUtil.displayBytes(keyBytes, 0, keyLength)); keyBinarySortableDeserializeToRow.setBytes(keyBytes, 0, keyLength); - keyBinarySortableDeserializeToRow.deserialize(batch, 0); + try { + keyBinarySortableDeserializeToRow.deserialize(batch, 0); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead details: " + + keyBinarySortableDeserializeToRow.getDetailedReadPositionString(), + e); + } for(int i = 0; i < firstValueColumnOffset; i++) { VectorizedBatchUtil.setRepeatingColumn(batch, i); } http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 2e8331a..7fb79fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -644,38 +644,26 @@ public final class VectorDeserializeRow { /** * Deserialize a row from the range of bytes specified by setBytes. * + * Use getDetailedReadPositionString to get detailed read position information to help + * diagnose exceptions that are thrown... + * * @param batch * @param batchIndex * @throws IOException */ public void deserialize(VectorizedRowBatch batch, int batchIndex) throws IOException { final int count = isConvert.length; - int i = 0; - try { - while (i < count) { - if (isConvert[i]) { - deserializeConvertRowColumn(batch, batchIndex, i); - } else { - deserializeRowColumn(batch, batchIndex, i); - } - i++; // Increment after the apply which could throw an exception. + for (int i = 0; i < count; i++) { + if (isConvert[i]) { + deserializeConvertRowColumn(batch, batchIndex, i); + } else { + deserializeRowColumn(batch, batchIndex, i); } - } catch (EOFException e) { - throwMoreDetailedException(e, i); } deserializeRead.extraFieldsCheck(); } - private void throwMoreDetailedException(IOException e, int index) throws EOFException { - StringBuilder sb = new StringBuilder(); - sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " + sourceTypeInfos.length + " fields ("); - for (int i = 0; i < sourceTypeInfos.length; i++) { - if (i > 0) { - sb.append(", "); - } - sb.append(((PrimitiveTypeInfo) sourceTypeInfos[i]).getPrimitiveCategory().name()); - } - sb.append(")"); - throw new EOFException(sb.toString()); + public String getDetailedReadPositionString() { + return deserializeRead.getDetailedReadPositionString(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 22f19f4..439dd37 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -808,7 +808,15 @@ public class VectorMapOperator extends AbstractMapOperator { currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength()); // Deserialize and append new row using the current batch size as the index. - currentVectorDeserializeRow.deserialize(deserializerBatch, deserializerBatch.size++); + try { + currentVectorDeserializeRow.deserialize( + deserializerBatch, deserializerBatch.size++); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + currentVectorDeserializeRow.getDetailedReadPositionString(), + e); + } } break; http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 22b2a17..469f86a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -134,6 +134,27 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC batch.selectedInUse = saveSelectedInUse; } + protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, + ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) + throws HiveException { + + byte[] bytes = byteSegmentRef.getBytes(); + int offset = (int) byteSegmentRef.getOffset(); + int length = byteSegmentRef.getLength(); + smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + + try { + smallTableVectorDeserializeRow.deserialize(batch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nHashMapResult detail: " + + hashMapResult.getDetailedHashMapResultPositionString() + + "\nDeserializeRead detail: " + + smallTableVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + //------------------------------------------------------------------------------------------------ /* @@ -180,13 +201,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC } if (smallTableVectorDeserializeRow != null) { - - byte[] bytes = byteSegmentRef.getBytes(); - int offset = (int) byteSegmentRef.getOffset(); - int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); - - smallTableVectorDeserializeRow.deserialize(batch, batchIndex); + doSmallTableDeserializeRow(batch, batchIndex, + byteSegmentRef, hashMapResult); } // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table"); @@ -248,12 +264,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC if (smallTableVectorDeserializeRow != null) { - byte[] bytes = byteSegmentRef.getBytes(); - int offset = (int) byteSegmentRef.getOffset(); - int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); - - smallTableVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size); + doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); } // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow"); @@ -298,13 +310,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC while (byteSegmentRef != null) { if (smallTableVectorDeserializeRow != null) { - - byte[] bytes = byteSegmentRef.getBytes(); - int offset = (int) byteSegmentRef.getOffset(); - int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); - - smallTableVectorDeserializeRow.deserialize(overflowBatch, overflowBatch.size); + doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); } overflowBatch.size++; @@ -348,10 +355,10 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC } } - if (hashMapResult.isEof()) { + byteSegmentRef = hashMapResult.next(); + if (byteSegmentRef == null) { break; } - byteSegmentRef = hashMapResult.next(); // Get ready for a another round of small table values. overflowBatch.reset(); @@ -543,14 +550,18 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC int offset = bigTable.currentOffset(); int length = bigTable.currentLength(); -// LOG.debug(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length); - bigTableVectorDeserializeRow.setBytes(bytes, offset, length); - bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size); + try { + bigTableVectorDeserializeRow.deserialize(spillReplayBatch, spillReplayBatch.size); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + bigTableVectorDeserializeRow.getDetailedReadPositionString(), + e); + } spillReplayBatch.size++; if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { - // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows"); process(spillReplayBatch, posBigTable); // call process once we have a full batch spillReplayBatch.reset(); batchCount++; @@ -558,7 +569,6 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC } // Process the row batch that has less than DEFAULT_SIZE rows if (spillReplayBatch.size > 0) { - // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows"); process(spillReplayBatch, posBigTable); spillReplayBatch.reset(); batchCount++; http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index 5373aad..ee66d5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -78,8 +78,15 @@ public abstract class VectorMapJoinFastLongHashTable byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); - if (keyBinarySortableDeserializeRead.readCheckNull()) { - return; + try { + if (keyBinarySortableDeserializeRead.readCheckNull()) { + return; + } + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead details: " + + keyBinarySortableDeserializeRead.getDetailedReadPositionString() + + "\nException: " + e.toString()); } long key = VectorMapJoinFastLongHashUtil.deserializeLongKey( http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java index 985fb1c..bf378ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java @@ -45,8 +45,15 @@ public class VectorMapJoinFastStringCommon { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); - if (keyBinarySortableDeserializeRead.readCheckNull()) { - return; + try { + if (keyBinarySortableDeserializeRead.readCheckNull()) { + return; + } + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead details: " + + keyBinarySortableDeserializeRead.getDetailedReadPositionString() + + "\nException: " + e.toString()); } hashTable.add( @@ -59,6 +66,7 @@ public class VectorMapJoinFastStringCommon { public VectorMapJoinFastStringCommon(boolean isOuterJoin) { this.isOuterJoin = isOuterJoin; PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; - keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos); + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead(primitiveTypeInfos); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java index f96e32b..f9c5b34 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java @@ -122,9 +122,7 @@ public class VectorMapJoinFastValueStore { private boolean isSingleRow; private int cappedCount; - private boolean haveReadCurrent; private int readIndex; - private boolean isEof; private boolean isNextEof; private boolean isNextLast; @@ -153,9 +151,48 @@ public class VectorMapJoinFastValueStore { cappedCount = (int) ((valueRefWord & CappedCount.bitMask) >> CappedCount.bitShift); // Position to beginning. - haveReadCurrent = false; readIndex = 0; - isEof = false; + } + + /** + * Get detailed HashMap result position information to help diagnose exceptions. + */ + @Override + public String getDetailedHashMapResultPositionString() { + StringBuffer sb = new StringBuffer(); + + sb.append("Read index "); + sb.append(readIndex); + if (isSingleRow) { + sb.append(" single row"); + } else { + sb.append(" capped count "); + sb.append(cappedCount); + } + + if (readIndex > 0) { + sb.append(" byteSegmentRef is byte[] of length "); + sb.append(byteSegmentRef.getBytes().length); + sb.append(" at offset "); + sb.append(byteSegmentRef.getOffset()); + sb.append(" for length "); + sb.append(byteSegmentRef.getLength()); + if (!isSingleRow) { + sb.append(" (isNextEof "); + sb.append(isNextEof); + sb.append(" isNextLast "); + sb.append(isNextLast); + sb.append(" nextAbsoluteValueOffset "); + sb.append(nextAbsoluteValueOffset); + sb.append(" isNextValueLengthSmall "); + sb.append(isNextValueLengthSmall); + sb.append(" nextSmallValueLength "); + sb.append(nextSmallValueLength); + sb.append(")"); + } + } + + return sb.toString(); } @Override @@ -193,9 +230,7 @@ public class VectorMapJoinFastValueStore { } // Position to beginning. - haveReadCurrent = false; readIndex = 0; - isEof = false; return internalRead(); } @@ -363,18 +398,9 @@ public class VectorMapJoinFastValueStore { } @Override - public boolean isEof() { - if (!hasRows) { - return true; - } - return isEof; - } - - @Override public void forget() { } - @Override public String toString() { StringBuilder sb = new StringBuilder(); http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java index fa6dedb..a5dfba8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMapResult.java @@ -57,7 +57,7 @@ public abstract class VectorMapJoinHashMapResult extends VectorMapJoinHashTableR public abstract ByteSegmentRef next(); /** - * @return Whether reading is at the end. + * Get detailed HashMap result position information to help diagnose exceptions. */ - public abstract boolean isEof(); + public abstract String getDetailedHashMapResultPositionString(); } http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java index e56c821..0dc8aee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java @@ -88,11 +88,6 @@ public class VectorMapJoinOptimizedHashMap } @Override - public boolean isEof() { - return bytesBytesMultiHashMapResult.isEof(); - } - - @Override public void forget() { bytesBytesMultiHashMapResult.forget(); super.forget(); @@ -105,6 +100,11 @@ public class VectorMapJoinOptimizedHashMap sb.append("isSingleRow " + (joinResult() == JoinUtil.JoinResult.MATCH ? isSingleRow() : "") + ")"); return sb.toString(); } + + @Override + public String getDetailedHashMapResultPositionString() { + return "(Not supported yet)"; + } } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java index aed9214..c1d7c72 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java @@ -133,8 +133,6 @@ public class TestBytesBytesMultiHashMap { hs.add(ref.copy()); ref = hashMapResult.next(); } - } else { - assertTrue(hashMapResult.isEof()); } assertEquals(state, count); assertEquals(values.length, count); http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index c55d951..9c4a973 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.Random; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import junit.framework.TestCase; @@ -51,7 +50,7 @@ public class TestVectorRowObject extends TestCase { String[] emptyScratchTypeNames = new String[0]; - RandomRowObjectSource source = new RandomRowObjectSource(); + VectorRandomRowSource source = new VectorRandomRowSource(); source.init(r); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index da69ee3..c6704f9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; -import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; @@ -85,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase { } void deserializeAndVerify(Output output, DeserializeRead deserializeRead, - RandomRowObjectSource source, Object[] expectedRow) + VectorRandomRowSource source, Object[] expectedRow) throws HiveException, IOException { deserializeRead.set(output.getData(), 0, output.getLength()); PrimitiveCategory[] primitiveCategories = source.primitiveCategories(); @@ -281,12 +280,11 @@ public class TestVectorSerDeRow extends TestCase { } deserializeRead.extraFieldsCheck(); TestCase.assertTrue(!deserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!deserializeRead.readBeyondBufferRangeWarned()); TestCase.assertTrue(!deserializeRead.bufferRangeHasExtraDataWarned()); } void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, - DeserializeRead deserializeRead, RandomRowObjectSource source, Object[][] randomRows, + DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows, int firstRandomRowIndex) throws HiveException, IOException { Output output = new Output(); @@ -311,7 +309,7 @@ public class TestVectorSerDeRow extends TestCase { String[] emptyScratchTypeNames = new String[0]; - RandomRowObjectSource source = new RandomRowObjectSource(); + VectorRandomRowSource source = new VectorRandomRowSource(); source.init(r); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); @@ -389,7 +387,7 @@ public class TestVectorSerDeRow extends TestCase { } } - private Output serializeRow(Object[] row, RandomRowObjectSource source, SerializeWrite serializeWrite) throws HiveException, IOException { + private Output serializeRow(Object[] row, VectorRandomRowSource source, SerializeWrite serializeWrite) throws HiveException, IOException { Output output = new Output(); serializeWrite.set(output); PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); @@ -542,7 +540,7 @@ public class TestVectorSerDeRow extends TestCase { String[] emptyScratchTypeNames = new String[0]; - RandomRowObjectSource source = new RandomRowObjectSource(); + VectorRandomRowSource source = new VectorRandomRowSource(); source.init(r); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java new file mode 100644 index 0000000..349c76a --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -0,0 +1,458 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hive.common.util.DateUtils; + +/** + * Generate object inspector and random row object[]. + */ +public class VectorRandomRowSource { + + private Random r; + + private int columnCount; + + private List typeNames; + + private PrimitiveCategory[] primitiveCategories; + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private List primitiveObjectInspectorList; + + private StructObjectInspector rowStructObjectInspector; + + public List typeNames() { + return typeNames; + } + + public PrimitiveCategory[] primitiveCategories() { + return primitiveCategories; + } + + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + public StructObjectInspector rowStructObjectInspector() { + return rowStructObjectInspector; + } + + public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) { + ArrayList partialPrimitiveObjectInspectorList = + new ArrayList(partialFieldCount); + List columnNames = new ArrayList(partialFieldCount); + for (int i = 0; i < partialFieldCount; i++) { + columnNames.add(String.format("partial%d", i)); + partialPrimitiveObjectInspectorList.add( + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + primitiveTypeInfos[i])); + } + + return ObjectInspectorFactory.getStandardStructObjectInspector( + columnNames, primitiveObjectInspectorList); + } + + public void init(Random r) { + this.r = r; + chooseSchema(); + } + + /* + * For now, exclude CHAR until we determine why there is a difference (blank padding) + * serializing with LazyBinarySerializeWrite and the regular SerDe... + */ + private static String[] possibleHiveTypeNames = { + "boolean", + "tinyint", + "smallint", + "int", + "bigint", + "date", + "float", + "double", + "string", +// "char", + "varchar", + "binary", + "date", + "timestamp", + "interval_year_month", + "interval_day_time", + "decimal" + }; + + private void chooseSchema() { + HashSet hashSet = null; + boolean allTypes; + boolean onlyOne = (r.nextInt(100) == 7); + if (onlyOne) { + columnCount = 1; + allTypes = false; + } else { + allTypes = r.nextBoolean(); + if (allTypes) { + // One of each type. + columnCount = possibleHiveTypeNames.length; + hashSet = new HashSet(); + } else { + columnCount = 1 + r.nextInt(20); + } + } + typeNames = new ArrayList(columnCount); + primitiveCategories = new PrimitiveCategory[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + primitiveObjectInspectorList = new ArrayList(columnCount); + List columnNames = new ArrayList(columnCount); + for (int c = 0; c < columnCount; c++) { + columnNames.add(String.format("col%d", c)); + String typeName; + + if (onlyOne) { + typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)]; + } else { + int typeNum; + if (allTypes) { + while (true) { + typeNum = r.nextInt(possibleHiveTypeNames.length); + Integer typeNumInteger = new Integer(typeNum); + if (!hashSet.contains(typeNumInteger)) { + hashSet.add(typeNumInteger); + break; + } + } + } else { + typeNum = r.nextInt(possibleHiveTypeNames.length); + } + typeName = possibleHiveTypeNames[typeNum]; + } + if (typeName.equals("char")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("char(%d)", maxLength); + } else if (typeName.equals("varchar")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("varchar(%d)", maxLength); + } else if (typeName.equals("decimal")) { + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + primitiveTypeInfos[c] = primitiveTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[c] = primitiveCategory; + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); + typeNames.add(typeName); + } + rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + } + + public Object[][] randomRows(int n) { + Object[][] result = new Object[n][]; + for (int i = 0; i < n; i++) { + result[i] = randomRow(); + } + return result; + } + + public Object[] randomRow() { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + Object object = randomObject(c); + if (object == null) { + throw new Error("Unexpected null for column " + c); + } + row[c] = getWritableObject(c, object); + if (row[c] == null) { + throw new Error("Unexpected null for writable for column " + c); + } + } + return row; + } + + public Object[] randomRow(int columnCount) { + return randomRow(columnCount, r, primitiveObjectInspectorList, primitiveCategories, + primitiveTypeInfos); + } + + public static Object[] randomRow(int columnCount, Random r, + List primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories, + PrimitiveTypeInfo[] primitiveTypeInfos) { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + Object object = randomObject(c, r, primitiveCategories, primitiveTypeInfos); + if (object == null) { + throw new Error("Unexpected null for column " + c); + } + row[c] = getWritableObject(c, object, primitiveObjectInspectorList, + primitiveCategories, primitiveTypeInfos); + if (row[c] == null) { + throw new Error("Unexpected null for writable for column " + c); + } + } + return row; + } + + public static void sort(Object[][] rows, ObjectInspector oi) { + for (int i = 0; i < rows.length; i++) { + for (int j = i + 1; j < rows.length; j++) { + if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) { + Object[] t = rows[i]; + rows[i] = rows[j]; + rows[j] = t; + } + } + } + } + + public void sort(Object[][] rows) { + VectorRandomRowSource.sort(rows, rowStructObjectInspector); + } + + public Object getWritableObject(int column, Object object) { + return getWritableObject(column, object, primitiveObjectInspectorList, + primitiveCategories, primitiveTypeInfos); + } + + public static Object getWritableObject(int column, Object object, + List primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories, + PrimitiveTypeInfo[] primitiveTypeInfos) { + ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); + case BYTE: + return ((WritableByteObjectInspector) objectInspector).create((byte) object); + case SHORT: + return ((WritableShortObjectInspector) objectInspector).create((short) object); + case INT: + return ((WritableIntObjectInspector) objectInspector).create((int) object); + case LONG: + return ((WritableLongObjectInspector) objectInspector).create((long) object); + case DATE: + return ((WritableDateObjectInspector) objectInspector).create((Date) object); + case FLOAT: + return ((WritableFloatObjectInspector) objectInspector).create((float) object); + case DOUBLE: + return ((WritableDoubleObjectInspector) objectInspector).create((double) object); + case STRING: + return ((WritableStringObjectInspector) objectInspector).create((String) object); + case CHAR: + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + } + case VARCHAR: + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + } + case BINARY: + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + case TIMESTAMP: + return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); + case INTERVAL_YEAR_MONTH: + return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); + case INTERVAL_DAY_TIME: + return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); + case DECIMAL: + { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public Object randomObject(int column) { + return randomObject(column, r, primitiveCategories, primitiveTypeInfos); + } + + public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories, + PrimitiveTypeInfo[] primitiveTypeInfos) { + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return Boolean.valueOf(r.nextInt(1) == 1); + case BYTE: + return Byte.valueOf((byte) r.nextInt()); + case SHORT: + return Short.valueOf((short) r.nextInt()); + case INT: + return Integer.valueOf(r.nextInt()); + case LONG: + return Long.valueOf(r.nextLong()); + case DATE: + return RandomTypeUtil.getRandDate(r); + case FLOAT: + return Float.valueOf(r.nextFloat() * 10 - 5); + case DOUBLE: + return Double.valueOf(r.nextDouble() * 10 - 5); + case STRING: + return RandomTypeUtil.getRandString(r); + case CHAR: + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + case VARCHAR: + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + case BINARY: + return getRandBinary(r, 1 + r.nextInt(100)); + case TIMESTAMP: + return RandomTypeUtil.getRandTimestamp(r); + case INTERVAL_YEAR_MONTH: + return getRandIntervalYearMonth(r); + case INTERVAL_DAY_TIME: + return getRandIntervalDayTime(r); + case DECIMAL: + return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { + int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { + int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + if (bd.scale() > bd.precision()) { + // Sometimes weird decimals are produced? + continue; + } + + return bd; + } + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + return intervalDayTimeVal; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java index 3a23584..28a4dc6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; +import java.util.Properties; import java.util.Random; import java.util.TreeMap; @@ -43,12 +44,14 @@ import static org.junit.Assert.*; public class CheckFastHashTable { - public static boolean findMatch(byte[] valueBytes, List actualValues, int actualCount, boolean[] taken) { + public static boolean findMatch(int valueIndex, byte[] valueBytes, List actualValues, + int actualCount, boolean[] actualTaken, int[] actualToValueMap) { for (int i = 0; i < actualCount; i++) { - if (!taken[i]) { + if (!actualTaken[i]) { byte[] actualBytes = actualValues.get(i); if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) { - taken[i] = true; + actualToValueMap[i] = valueIndex; + actualTaken[i] = true; return true; } } @@ -56,7 +59,7 @@ public class CheckFastHashTable { return false; } - public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult, + public static int[] verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult, List values) { int valueCount = values.size(); @@ -87,15 +90,16 @@ public class CheckFastHashTable { TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount); } - boolean[] taken = new boolean[actualCount]; + boolean[] actualTaken = new boolean[actualCount]; + int[] actualToValueMap = new int[actualCount]; for (int i = 0; i < actualCount; i++) { byte[] valueBytes = values.get(i); - if (!findMatch(valueBytes, actualValues, actualCount, taken)) { + if (!findMatch(i, valueBytes, actualValues, actualCount, actualTaken, actualToValueMap)) { List availableLengths = new ArrayList(); for (int a = 0; a < actualCount; a++) { - if (!taken[a]) { + if (!actualTaken[a]) { availableLengths.add(actualValues.get(a).length); } } @@ -103,6 +107,7 @@ public class CheckFastHashTable { ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)"); } } + return actualToValueMap; } /* http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java new file mode 100644 index 0000000..0bcfb56 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java @@ -0,0 +1,387 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.EOFException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.TreeMap; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + +public class CheckFastRowHashMap extends CheckFastHashTable { + + public static void verifyHashMapRows(List rows, int[] actualToValueMap, + VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos) throws IOException { + + final int count = rows.size(); + final int columnCount = typeInfos.length; + + WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); + + for (int a = 0; a < count; a++) { + + int valueIndex = actualToValueMap[a]; + + Object[] row = rows.get(valueIndex); + + byte[] bytes = ref.getBytes(); + int offset = (int) ref.getOffset(); + int length = ref.getLength(); + + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(typeInfos); + + lazyBinaryDeserializeRead.set(bytes, offset, length); + + for (int index = 0; index < columnCount; index++) { + Writable writable = (Writable) row[index]; + VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable); + } + lazyBinaryDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + + TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); + + ref = hashMapResult.next(); + if (a == count - 1) { + TestCase.assertTrue (ref == null); + } else { + TestCase.assertTrue (ref != null); + } + } + } + + private static String debugDetailedReadPositionString; + + private static String debugDetailedHashMapResultPositionString; + + private static String debugExceptionMessage; + private static StackTraceElement[] debugStackTrace; + + public static void verifyHashMapRowsMore(List rows, int[] actualToValueMap, + VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos, + int clipIndex, boolean useExactBytes) throws IOException { + + final int count = rows.size(); + final int columnCount = typeInfos.length; + + WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); + + for (int a = 0; a < count; a++) { + + int valueIndex = actualToValueMap[a]; + + Object[] row = rows.get(valueIndex); + + byte[] bytes = ref.getBytes(); + int offset = (int) ref.getOffset(); + int length = ref.getLength(); + if (a == clipIndex) { + length--; + } + + if (useExactBytes) { + // Use exact byte array which might generate array out of bounds... + bytes = Arrays.copyOfRange(bytes, offset, offset + length); + offset = 0; + } + + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(typeInfos); + + lazyBinaryDeserializeRead.set(bytes, offset, length); + + boolean thrown = false; + Exception saveException = null; + boolean notExpected = false; + int index = 0; + try { + for (index = 0; index < columnCount; index++) { + Writable writable = (Writable) row[index]; + VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable); + } + } catch (Exception e) { + thrown = true; + saveException = e; + debugDetailedReadPositionString = lazyBinaryDeserializeRead.getDetailedReadPositionString(); + + debugDetailedHashMapResultPositionString = hashMapResult.getDetailedHashMapResultPositionString(); + + debugExceptionMessage = saveException.getMessage(); + debugStackTrace = saveException.getStackTrace(); + } + if (a == clipIndex) { + if (!thrown) { + TestCase.fail("Expecting an exception to be thrown for the clipped case..."); + } else { + TestCase.assertTrue(saveException != null); + if (saveException instanceof EOFException) { + // This is the one we are expecting. + } else if (saveException instanceof ArrayIndexOutOfBoundsException) { + notExpected = true; + } else { + TestCase.fail("Expecting an EOFException to be thrown for the clipped case..."); + } + } + } else { + if (thrown) { + TestCase.fail("Not expecting an exception to be thrown for the non-clipped case..."); + } + lazyBinaryDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + + TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + ref = hashMapResult.next(); + if (a == count - 1) { + TestCase.assertTrue (ref == null); + } else { + TestCase.assertTrue (ref != null); + } + } + } + + /* + * Element for Key: row and byte[] x Hash Table: HashMap + */ + public static class FastRowHashMapElement { + private byte[] key; + private Object[] keyRow; + private List values; + private List valueRows; + + public FastRowHashMapElement(byte[] key, Object[] keyRow, byte[] firstValue, + Object[] valueRow) { + this.key = key; + this.keyRow = keyRow; + values = new ArrayList(); + values.add(firstValue); + valueRows = new ArrayList(); + valueRows.add(valueRow); + } + + public byte[] getKey() { + return key; + } + + public Object[] getKeyRow() { + return keyRow; + } + + public int getCount() { + return values.size(); + } + + public List getValues() { + return values; + } + + public List getValueRows() { + return valueRows; + } + + public void add(byte[] value, Object[] valueRow) { + values.add(value); + valueRows.add(valueRow); + } + } + + /* + * Verify table for Key: row and byte[] x Hash Table: HashMap + */ + public static class VerifyFastRowHashMap { + + private int count; + + private FastRowHashMapElement[] array; + + private TreeMap keyValueMap; + + public VerifyFastRowHashMap() { + count = 0; + array = new FastRowHashMapElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key, Object[] keyRow, byte[] value, Object[] valueRow) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + int index = keyValueMap.get(keyBytesWritable); + array[index].add(value, valueRow); + } else { + if (count >= array.length) { + // Grow. + FastRowHashMapElement[] newArray = new FastRowHashMapElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastRowHashMapElement(key, keyRow, value, valueRow); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Object[] valueRow, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].add(value, valueRow); + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public List getValues(int index) { + return array[index].getValues(); + } + + public void verify(VectorMapJoinFastHashTable map, + HashTableKeyType hashTableKeyType, + PrimitiveTypeInfo[] valuePrimitiveTypeInfos, boolean doClipping, + boolean useExactBytes, Random random) throws IOException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastRowHashMapElement element = array[index]; + + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = null; + JoinUtil.JoinResult joinResult = JoinUtil.JoinResult.NOMATCH; + switch (hashTableKeyType) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + { + Object[] keyRow = element.getKeyRow(); + Object keyObject = keyRow[0]; + VectorMapJoinFastLongHashMap longHashMap = (VectorMapJoinFastLongHashMap) map; + hashMapResult = longHashMap.createHashMapResult(); + long longKey; + switch (hashTableKeyType) { + case BOOLEAN: + longKey = ((BooleanWritable) keyObject).get() ? 1 : 0; + break; + case BYTE: + longKey = ((ByteWritable) keyObject).get(); + break; + case SHORT: + longKey = ((ShortWritable) keyObject).get(); + break; + case INT: + longKey = ((IntWritable) keyObject).get(); + break; + case LONG: + longKey = ((LongWritable) keyObject).get(); + break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); + } + joinResult = longHashMap.lookup(longKey, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + break; + case STRING: + { + Object[] keyRow = element.getKeyRow(); + Object keyObject = keyRow[0]; + VectorMapJoinFastStringHashMap stringHashMap = (VectorMapJoinFastStringHashMap) map; + hashMapResult = stringHashMap.createHashMapResult(); + Text text = (Text) keyObject; + byte[] bytes = text.getBytes(); + int length = text.getLength(); + joinResult = stringHashMap.lookup(bytes, 0, length, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + break; + case MULTI_KEY: + { + byte[] keyBytes = element.getKey(); + VectorMapJoinFastMultiKeyHashMap stringHashMap = (VectorMapJoinFastMultiKeyHashMap) map; + hashMapResult = stringHashMap.createHashMapResult(); + joinResult = stringHashMap.lookup(keyBytes, 0, keyBytes.length, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); + } + + int[] actualToValueMap = verifyHashMapValues(hashMapResult, values); + + List rows = element.getValueRows(); + if (!doClipping && !useExactBytes) { + verifyHashMapRows(rows, actualToValueMap, hashMapResult, valuePrimitiveTypeInfos); + } else { + int clipIndex = random.nextInt(rows.size()); + verifyHashMapRowsMore(rows, actualToValueMap, hashMapResult, valuePrimitiveTypeInfos, + clipIndex, useExactBytes); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/69ab3b27/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java index bbfa65f..8525e99 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java @@ -269,4 +269,19 @@ public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable { int keyCount = 1000; addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); } + + @Test + public void testReallyBig() throws Exception { + random = new Random(42662); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 1000000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } }