jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [6/6] jena git commit: Set the threshold correctly.
Date Tue, 02 Jun 2015 10:41:09 GMT
Set the threshold correctly.

Use a data bag, not a data net, as we don't use the early notification
of uniqueness anymore.  See JENA-949.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/bcba645a
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/bcba645a
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/bcba645a

Branch: refs/heads/master
Commit: bcba645a561866545705c0e73beae143a15fb3d9
Parents: fecd978
Author: Andy Seaborne <andy@apache.org>
Authored: Tue Jun 2 11:39:05 2015 +0100
Committer: Andy Seaborne <andy@apache.org>
Committed: Tue Jun 2 11:39:05 2015 +0100

----------------------------------------------------------------------
 .../engine/iterator/QueryIterDistinct.java      | 45 ++++++++++----------
 .../iterator/AbstractTestDistinctReduced.java   |  1 -
 2 files changed, 23 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/bcba645a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
index a88cb39..d373ce6 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterDistinct.java
@@ -20,13 +20,12 @@ package org.apache.jena.sparql.engine.iterator ;
 
 import java.util.* ;
 
-import org.apache.jena.atlas.data.BagFactory ;
-import org.apache.jena.atlas.data.DistinctDataNet ;
-import org.apache.jena.atlas.data.ThresholdPolicy ;
-import org.apache.jena.atlas.data.ThresholdPolicyFactory ;
+import org.apache.jena.atlas.data.* ;
 import org.apache.jena.atlas.lib.InternalErrorException ;
+import org.apache.jena.query.ARQ ;
 import org.apache.jena.query.SortCondition ;
 import org.apache.jena.riot.system.SerializationFactoryFinder ;
+import org.apache.jena.sparql.ARQException ;
 import org.apache.jena.sparql.engine.ExecutionContext ;
 import org.apache.jena.sparql.engine.QueryIterator ;
 import org.apache.jena.sparql.engine.binding.Binding ;
@@ -39,23 +38,23 @@ import org.apache.jena.sparql.engine.binding.BindingProjectNamed ;
  * {@link DistinctDataNet}, then yield   
  * not  return any results until the input iterator has been exhausted.
  * 
- * @see DistinctDataNet
+ * @see DistinctDataBag
  */
 public class QueryIterDistinct extends QueryIter1
 {
-    private int Threshold1 = 3 ;
-    private DistinctDataNet<Binding> db = null ;
+    private long memThreshold = Long.MAX_VALUE ;    // Default "off" value.
+    private DistinctDataBag<Binding> db = null ;
     private Iterator<Binding> iterator = null ;
     private Set<Binding> seen = new HashSet<>() ;
     private Binding slot = null ;
 
-    public QueryIterDistinct(QueryIterator qIter, ExecutionContext context) {
-        super(qIter, context) ;
-    }
-    
-    public QueryIterDistinct(QueryIterator qIter, ExecutionContext context, int threshold1)
{
-        super(qIter, context) ;
-        this.Threshold1 = threshold1 ;
+    public QueryIterDistinct(QueryIterator qIter, ExecutionContext execCxt) {
+        super(qIter, execCxt) ;
+        if ( execCxt != null ) {
+            memThreshold = execCxt.getContext().getLong(ARQ.spillToDiskThreshold, memThreshold)
;
+            if ( memThreshold < 0 )
+                throw new ARQException("BAd spillToDiskThreshold: "+memThreshold) ;
+        }
     }
     
     @Override
@@ -67,7 +66,7 @@ public class QueryIterDistinct extends QueryIter1
             return iterator.hasNext() ;
        
         // At this point, we are currently in the initial pre-threshold mode.
-        if ( seen.size() >= Threshold1 ) {
+        if ( seen.size() < memThreshold ) {
             Binding b = getInputNextUnseen() ;
             if ( b == null )
                 return false ;
@@ -76,18 +75,19 @@ public class QueryIterDistinct extends QueryIter1
             return true ;
         }
         
-        // Hit the threashold.
+        // Hit the threshold.
         loadDataBag() ;
-        // Switch to iterating from the databad.  
+        // Switch to iterating from the data bag.  
         iterator = db.iterator() ;
         // Leave slot null.
         return iterator.hasNext() ;
     }
     
+    /** Load the data bag with. Filter incoming by the already seen in-memory elements */
 
     private void loadDataBag() {
         ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(super.getExecContext().getContext())
;
         Comparator<Binding> comparator = new BindingComparator(new ArrayList<SortCondition>(),
super.getExecContext()) ;
-        this.db = BagFactory.newDistinctNet(policy, SerializationFactoryFinder.bindingSerializationFactory(),
comparator) ;
+        this.db = BagFactory.newDistinctBag(policy, SerializationFactoryFinder.bindingSerializationFactory(),
comparator) ;
         for(;;) {
             Binding b = getInputNextUnseen() ;
             if ( b == null )
@@ -96,9 +96,10 @@ public class QueryIterDistinct extends QueryIter1
         }
     }
     
-    // Return the next binding from the input filtered by seen.
-    // This does not update seen.
-    // Returns null on end of input.
+    /** Return the next binding from the input filtered by seen.
+     * This does not update seen.
+     * Returns null on end of input.
+    */
     private Binding getInputNextUnseen() {
         while( getInput().hasNext() ) {
             Binding b = getInputNext() ;
@@ -109,7 +110,7 @@ public class QueryIterDistinct extends QueryIter1
         return null ;
     }
 
-    // Return the next wrapped binding from the input.
+    /** Return the binding from the input, hiding any variables to be ignored. */
     private Binding getInputNext() {
         Binding b = getInput().next() ;
         // Hide unnamed and internal variables.

http://git-wip-us.apache.org/repos/asf/jena/blob/bcba645a/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
b/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
index d9b5ec3..ac37b63 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/iterator/AbstractTestDistinctReduced.java
@@ -77,7 +77,6 @@ public abstract class AbstractTestDistinctReduced extends BaseTest {
         distinct(data, results) ;
     }
 
-
     private void distinct(List<String> data, List<String> results) {
         // Distinct Iterators are not required to preserve order.
         List<Binding> input = build(data) ;


Mime
View raw message