jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sal...@apache.org
Subject svn commit: r1299301 - in /incubator/jena/Jena2/ARQ/trunk/src: main/java/org/openjena/atlas/data/SortedDataBag.java test/java/org/openjena/atlas/data/TestSortedDataBag.java
Date Sat, 10 Mar 2012 23:14:23 GMT
Author: sallen
Date: Sat Mar 10 23:14:22 2012
New Revision: 1299301

URL: http://svn.apache.org/viewvc?rev=1299301&view=rev
Log:
JENA-157 (DataBag needs to pre-merge spill files) - Added unit test for premerge.

Modified:
    incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/SortedDataBag.java
    incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/atlas/data/TestSortedDataBag.java

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/SortedDataBag.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/SortedDataBag.java?rev=1299301&r1=1299300&r2=1299301&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/SortedDataBag.java
(original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/atlas/data/SortedDataBag.java
Sat Mar 10 23:14:22 2012
@@ -62,8 +62,11 @@ import org.openjena.atlas.lib.Sink ;
  */
 public class SortedDataBag<E> extends AbstractDataBag<E>
 {
-    private static final int MAX_SPILL_FILES = 100 ; // this is the maximum number of files
to merge at the same time
-
+    /**
+     * The the maximum number of files to merge at the same time.  Without this, you can
run out of file handles and other bad things.
+     */
+    protected static int MAX_SPILL_FILES = 100 ;
+    
     protected final ThresholdPolicy<E> policy;
     protected final SerializationFactory<E> serializationFactory;
     protected final Comparator<? super E> comparator;
@@ -250,25 +253,36 @@ public class SortedDataBag<E> extends Ab
             }
         }
     }
-    
-    private void preMerge() {
-        if (getSpillFiles() == null || getSpillFiles().size() <= MAX_SPILL_FILES) { return;
}
 
-        try {
-            while ( getSpillFiles().size() > MAX_SPILL_FILES ) {
+    private void preMerge()
+    {
+        if (getSpillFiles() == null || getSpillFiles().size() <= MAX_SPILL_FILES)
+        {
+            return ;
+        }
+
+        try
+        {
+            while (getSpillFiles().size() > MAX_SPILL_FILES)
+            {
                 Sink<E> sink = serializationFactory.createSerializer(getSpillStream())
;
                 Iterator<E> ssi = iterator(MAX_SPILL_FILES) ;
-                try {
-                    while ( ssi.hasNext() ) {
-                        sink.send( ssi.next() );
+                try
+                {
+                    while (ssi.hasNext())
+                    {
+                        sink.send(ssi.next()) ;
                     }
-                } finally {
+                }
+                finally
+                {
                     Iter.close(ssi) ;
                     sink.close() ;
                 }
-                
+
                 List<File> toRemove = new ArrayList<File>(MAX_SPILL_FILES) ;
-                for ( int i = 0; i < MAX_SPILL_FILES; i++ ) {
+                for (int i = 0; i < MAX_SPILL_FILES; i++)
+                {
                     File file = getSpillFiles().get(i) ;
                     file.delete() ;
                     toRemove.add(file) ;
@@ -277,8 +291,10 @@ public class SortedDataBag<E> extends Ab
                 getSpillFiles().removeAll(toRemove) ;
 
                 memory = new ArrayList<E>() ;
-            }            
-        } catch (IOException e) {
+            }
+        }
+        catch (IOException e)
+        {
             throw new AtlasException(e) ;
         }
     }

Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/atlas/data/TestSortedDataBag.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/atlas/data/TestSortedDataBag.java?rev=1299301&r1=1299300&r2=1299301&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/atlas/data/TestSortedDataBag.java
(original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/atlas/data/TestSortedDataBag.java
Sat Mar 10 23:14:22 2012
@@ -27,7 +27,6 @@ import java.util.Random ;
 
 import junit.framework.TestCase ;
 
-import org.junit.Before ;
 import org.junit.Test ;
 import org.openjena.atlas.iterator.Iter ;
 import org.openjena.riot.SerializationFactoryFinder ;
@@ -49,34 +48,26 @@ public class TestSortedDataBag extends T
 {
     private static final String LETTERS = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM";
     private Random random;
-    private List<Binding> unsorted;
-
-    @Before @Override public void setUp() 
+    
+    @Test public void testSorting() 
     {
-        random = new Random();
-        Var[] vars = new Var[]{
-            Var.alloc("1"), Var.alloc("2"), Var.alloc("3"),
-            Var.alloc("4"), Var.alloc("5"), Var.alloc("6"),
-            Var.alloc("7"), Var.alloc("8"), Var.alloc("9"), Var.alloc("0")
-        };
-        unsorted = new ArrayList<Binding>();
-        for(int i = 0; i < 500; i++){
-            unsorted.add(randomBinding(vars));
-        }
+        testSorting(500, 10);
     }
-
-    @Test public void testSorting() 
+    
+    private void testSorting(int numBindings, int threshold)
     {
+        List<Binding> unsorted = randomBindings(numBindings);
+        
         List<SortCondition> conditions = new ArrayList<SortCondition>(); 
         conditions.add(new SortCondition(new ExprVar("8"), Query.ORDER_ASCENDING));
         conditions.add(new SortCondition(new ExprVar("1"), Query.ORDER_ASCENDING));
         conditions.add(new SortCondition(new ExprVar("0"), Query.ORDER_DESCENDING));
         BindingComparator comparator = new BindingComparator(conditions);
-        
+
         List<Binding> sorted = new ArrayList<Binding>();
         
         SortedDataBag<Binding> db = new SortedDataBag<Binding>(
-                new ThresholdPolicyCount<Binding>(10),
+                new ThresholdPolicyCount<Binding>(threshold),
                 SerializationFactoryFinder.bindingSerializationFactory(),
                 comparator);
         try
@@ -98,8 +89,34 @@ public class TestSortedDataBag extends T
         assertEquals(unsorted, sorted);
     }
     
+    @Test public void testSortingWithPreMerge() 
+    {
+        // Save the original value...
+        int origMaxSpillFiles = SortedDataBag.MAX_SPILL_FILES;
+        try
+        {
+            // Vary the number of spill files and bindings so we get a variable number of
premerge rounds
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(1, 1);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(2, 1);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(3, 1);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(4, 1);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(5, 1);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(1, 10);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(1000, 10);
+            SortedDataBag.MAX_SPILL_FILES = 100;  testSorting(1000, 10);
+            SortedDataBag.MAX_SPILL_FILES = 2;    testSorting(10, 10);
+            SortedDataBag.MAX_SPILL_FILES = 5;    testSorting(10, 10);
+        }
+        finally
+        {
+            SortedDataBag.MAX_SPILL_FILES = origMaxSpillFiles;
+        }
+    }
+    
     @Test public void testTemporaryFilesAreCleanedUpAfterCompletion()
     {
+        List<Binding> unsorted = randomBindings(500);
+        
         List<SortCondition> conditions = new ArrayList<SortCondition>(); 
         conditions.add(new SortCondition(new ExprVar("8"), Query.ORDER_ASCENDING));
         BindingComparator comparator = new BindingComparator(conditions);
@@ -149,6 +166,22 @@ public class TestSortedDataBag extends T
         assertEquals(0, count);
     }
     
+    private List<Binding> randomBindings(int numBindings) 
+    {
+        random = new Random();
+        Var[] vars = new Var[]{
+            Var.alloc("1"), Var.alloc("2"), Var.alloc("3"),
+            Var.alloc("4"), Var.alloc("5"), Var.alloc("6"),
+            Var.alloc("7"), Var.alloc("8"), Var.alloc("9"), Var.alloc("0")
+        };
+        List<Binding> toReturn = new ArrayList<Binding>();
+        for(int i = 0; i < numBindings; i++){
+            toReturn.add(randomBinding(vars));
+        }
+        
+        return toReturn;
+    }
+    
     private Binding randomBinding(Var[] vars)
     {
         BindingMap binding = BindingFactory.create();
@@ -165,12 +198,12 @@ public class TestSortedDataBag extends T
         return binding;
     }
 
-    public String randomURI() 
+    private String randomURI() 
     {
         return String.format("http://%s.example.com/%s", randomString(10), randomString(10));
     }
     
-    public String randomString(int length)
+    private String randomString(int length)
     {
         StringBuilder builder = new StringBuilder();
         for(int i = 0; i < length; i++){



Mime
View raw message