asterixdb-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Heri Ramampiaro (Code Review)" <do-not-re...@asterixdb.incubator.apache.org>
Subject Change in asterixdb[master]: ASTERIXDB-1255: Fix for potential issues related to object c...
Date Mon, 18 Jan 2016 13:19:16 GMT
Heri Ramampiaro has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/586

Change subject: ASTERIXDB-1255: Fix for potential issues related to object creation in Jaccard
Similarity evaluation
......................................................................

ASTERIXDB-1255: Fix for potential issues related to object creation in Jaccard Similarity
evaluation

This change fix the above issue, avoiding creation of hashfunction objects pr. tuple evaluation
(at runtime). Instead, it uses object pool.

Change-Id: I08c550d35d864df08792369d38ff81012e3976af
---
M asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
M asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
M asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
3 files changed, 71 insertions(+), 20 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/86/586/1

diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
index 767a343..35d99da 100644
--- a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
+++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
@@ -21,6 +21,8 @@
 
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.om.util.container.IObjectFactory;
+import org.apache.hyracks.algebricks.common.utils.Triple;
 import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
 import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -29,8 +31,8 @@
 import org.apache.hyracks.data.std.primitive.DoublePointable;
 import org.apache.hyracks.data.std.primitive.FloatPointable;
 import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.primitive.UTF8StringLowercasePointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 
 public class ListItemBinaryComparatorFactory implements IBinaryComparatorFactory {
 
@@ -38,6 +40,15 @@
 
     public static final ListItemBinaryComparatorFactory INSTANCE = new ListItemBinaryComparatorFactory();
 
+    // To be used for object alloctor (object pool)
+    public static final IObjectFactory<IBinaryComparator, Triple<ATypeTag, ATypeTag,
Boolean>> ALLOCATOR = new
+            IObjectFactory<IBinaryComparator, Triple<ATypeTag, ATypeTag, Boolean>>()
{
+        @Override
+        public IBinaryComparator create(Triple<ATypeTag, ATypeTag, Boolean> arg) {
+            return (new ListItemBinaryComparatorFactory()).createBinaryComparator(arg.first,
arg.second, arg.third);
+        }
+    };
+
     private ListItemBinaryComparatorFactory() {
     }
 
diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
index 493833b..d879924 100644
--- a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
+++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
@@ -23,6 +23,8 @@
 
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.om.util.container.IObjectFactory;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
 import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -37,11 +39,17 @@
  * types with the same raw bytes.
  */
 public class ListItemBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
-
-    private static final long serialVersionUID = 1L;
-
     public static final ListItemBinaryHashFunctionFactory INSTANCE = new ListItemBinaryHashFunctionFactory();
 
+    // To be used for object alloctor (object pool)
+    public static final IObjectFactory<IBinaryHashFunction, Pair<ATypeTag, Boolean>>
ALLOCATOR = new IObjectFactory<IBinaryHashFunction, Pair<ATypeTag, Boolean>>()
{
+        @Override
+        public IBinaryHashFunction create(Pair<ATypeTag, Boolean> arg) {
+            return (new ListItemBinaryHashFunctionFactory()).createBinaryHashFunction(arg.first,
arg.second);
+        }
+    };
+    private static final long serialVersionUID = 1L;
+
     private ListItemBinaryHashFunctionFactory() {
     }
 
diff --git a/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
b/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
index 4481819..2d58f3d 100644
--- a/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
+++ b/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
@@ -30,9 +30,13 @@
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.BuiltinType;
 import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.om.util.container.IObjectPool;
+import org.apache.asterix.om.util.container.ListObjectPool;
 import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap;
 import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap.BinaryEntry;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.algebricks.common.utils.Triple;
 import org.apache.hyracks.algebricks.runtime.base.ICopyEvaluator;
 import org.apache.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
 import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
@@ -62,15 +66,20 @@
     protected final AsterixOrderedListIterator sndOrdListIter = new AsterixOrderedListIterator();
     protected final AsterixUnorderedListIterator fstUnordListIter = new AsterixUnorderedListIterator();
     protected final AsterixUnorderedListIterator sndUnordListIter = new AsterixUnorderedListIterator();
-
-    protected AbstractAsterixListIterator firstListIter;
-    protected AbstractAsterixListIterator secondListIter;
-
     protected final AMutableFloat aFloat = new AMutableFloat(0);
     @SuppressWarnings("unchecked")
     protected final ISerializerDeserializer<AFloat> floatSerde = AqlSerializerDeserializerProvider.INSTANCE
             .getSerializerDeserializer(BuiltinType.AFLOAT);
-
+    // Ignore case for strings. Defaults to true.
+    protected final boolean ignoreCase = true;
+    protected final Pair<ATypeTag, Boolean> itemArg = new Pair<>(ATypeTag.ANY,
false);
+    protected final Triple<ATypeTag, ATypeTag, Boolean> itemCompartorArg = new Triple<>(null,
null, false);
+    protected AbstractAsterixListIterator firstListIter;
+    protected AbstractAsterixListIterator secondListIter;
+    protected IObjectPool<IBinaryHashFunction, Pair<ATypeTag, Boolean>> binaryHashFunctionPool
=
+            new ListObjectPool<IBinaryHashFunction, Pair<ATypeTag, Boolean>>(ListItemBinaryHashFunctionFactory.ALLOCATOR);
+    protected IObjectPool<IBinaryComparator, Triple<ATypeTag, ATypeTag, Boolean>>
binaryComparatorPool =
+            new ListObjectPool<IBinaryComparator, Triple<ATypeTag, ATypeTag, Boolean>>(ListItemBinaryComparatorFactory.ALLOCATOR);
     protected ATypeTag firstTypeTag;
     protected ATypeTag secondTypeTag;
     protected int firstStart = -1;
@@ -78,13 +87,10 @@
     protected float jaccSim = 0.0f;
     protected ATypeTag firstItemTypeTag;
     protected ATypeTag secondItemTypeTag;
-
     protected BinaryHashMap hashMap;
     protected BinaryEntry keyEntry = new BinaryEntry();
     protected BinaryEntry valEntry = new BinaryEntry();
-
-    // Ignore case for strings. Defaults to true.
-    protected final boolean ignoreCase = true;
+    protected int hashTableSize = TABLE_SIZE;
 
     public SimilarityJaccardEvaluator(ICopyEvaluatorFactory[] args, IDataOutputProvider output)
             throws AlgebricksException {
@@ -109,6 +115,7 @@
         }
         try {
             writeResult(jaccSim);
+            resetPool();
         } catch (IOException e) {
             throw new AlgebricksException(e);
         }
@@ -143,7 +150,12 @@
         if (firstListIter.size() == 0 || secondListIter.size() == 0) {
             return false;
         }
+
+        // Set the size of the table dynamically
+        hashTableSize = (firstListIter.size() >= secondListIter.size()) ? firstListIter.size()
: secondListIter.size();
+
         // TODO: Check item types are compatible.
+
         return true;
     }
 
@@ -231,13 +243,10 @@
             return;
         }
 
-        IBinaryHashFunction putHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction(
-                buildItemTypeTag, ignoreCase);
-        IBinaryHashFunction getHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction(
-                probeItemTypeTag, ignoreCase);
-        IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE.createBinaryComparator(buildItemTypeTag,
-                probeItemTypeTag, ignoreCase);
-        hashMap = new BinaryHashMap(TABLE_SIZE, TABLE_FRAME_SIZE, putHashFunc, getHashFunc,
cmp);
+        IBinaryHashFunction putHashFunc = getBinaryHashFunction(buildItemTypeTag, ignoreCase);
+        IBinaryHashFunction getHashFunc = getBinaryHashFunction(probeItemTypeTag, ignoreCase);
+        IBinaryComparator cmp = getBinaryComparatorFunction(buildItemTypeTag, probeItemTypeTag,
ignoreCase);
+        hashMap = new BinaryHashMap(hashTableSize, TABLE_FRAME_SIZE, putHashFunc, getHashFunc,
cmp);
     }
 
     protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws AlgebricksException
{
@@ -283,4 +292,27 @@
         aFloat.setValue(jacc);
         floatSerde.serialize(aFloat, out);
     }
+
+    // Make sure to release the object resource.
+    protected void resetPool() {
+        binaryHashFunctionPool.reset();
+        binaryComparatorPool.reset();
+    }
+
+    // Allocate binary function for the itemlist
+    protected IBinaryHashFunction getBinaryHashFunction(ATypeTag typeTag, boolean ignoreCase)
{
+        itemArg.first = typeTag;
+        itemArg.second = ignoreCase;
+        return binaryHashFunctionPool.allocate(itemArg);
+    }
+
+    // Allocate binary function for the itemlist
+    protected IBinaryComparator getBinaryComparatorFunction(ATypeTag firstItemTypeTag, ATypeTag
secondItemTypeTag,
+            boolean ignoreCase) {
+        itemCompartorArg.first = firstItemTypeTag;
+        itemCompartorArg.second = secondItemTypeTag;
+        itemCompartorArg.third = ignoreCase;
+        return binaryComparatorPool.allocate(itemCompartorArg);
+    }
+
 }
\ No newline at end of file

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/586
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I08c550d35d864df08792369d38ff81012e3976af
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Heri Ramampiaro <heriram@gmail.com>

Mime
View raw message