manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1546549 - in /manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system: PriorityCalculator.java ReprioritizationTracker.java
Date Fri, 29 Nov 2013 12:26:54 GMT
Author: kwright
Date: Fri Nov 29 12:26:53 2013
New Revision: 1546549

URL: http://svn.apache.org/r1546549
Log:
Add infrastructure support for preloading bin values

Modified:
    manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
    manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ReprioritizationTracker.java

Modified: manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java?rev=1546549&r1=1546548&r2=1546549&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
(original)
+++ manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PriorityCalculator.java
Fri Nov 29 12:26:53 2013
@@ -39,25 +39,25 @@ public class PriorityCalculator implemen
   protected final String[] binNames;
   protected final ReprioritizationTracker rt;
   
+  protected final double[] binCountScaleFactors;
+  protected final double[] weightedMinimumDepths;
+  
+  protected Double cachedValue = null;
+  
   /** Constructor. */
   public PriorityCalculator(ReprioritizationTracker rt, IRepositoryConnection connection,
String[] documentBins)
+    throws ManifoldCFException
   {
     this.connection = connection;
     this.binNames = documentBins;
     this.rt = rt;
-  }
-
-  /** Calculate a document priority value.  Priorities are reversed, and in log space, so
that
-  * zero (0.0) is considered the highest possible priority, and larger priority values are
considered lower in actual priority.
-  *@param binNames are the global bins to which the document belongs.
-  *@param connection is the connection, from which the throttles may be obtained.  More highly
throttled connections are given
-  *          less favorable priority.
-  *@return the priority value, based on recent history.  Also updates statistics atomically.
-  */
-  @Override
-  public double getDocumentPriority()
-    throws ManifoldCFException
-  {
+    
+    // Now, precompute the weightedMinimumDepths etc; we'll need it whether we preload or
not.
+    
+    // For each bin, we will be calculating the bin count scale factor, which is what we
multiply the bincount by to adjust for the
+    // throttling on that bin.
+    binCountScaleFactors = new double[binNames.length];
+    weightedMinimumDepths = new double[binNames.length];
 
     // NOTE: We must be sure to adjust the return value by the factor calculated due to performance;
a slower throttle rate
     // should yield a lower priority.  In theory it should be possible to calculate an adjusted
priority pretty exactly,
@@ -85,11 +85,6 @@ public class PriorityCalculator implemen
     // also when resetting the bin counts.
     double[] maxFetchRates = calculateMaxFetchRates(binNames,connection);
 
-    // For each bin, we will be calculating the bin count scale factor, which is what we
multiply the bincount by to adjust for the
-    // throttling on that bin.
-    double[] binCountScaleFactors = new double[binNames.length];
-    double[] weightedMinimumDepths = new double[binNames.length];
-
     // Before calculating priority, calculate some factors that will allow us to determine
the proper starting value for a bin.
     double currentMinimumDepth = rt.getMinimumDepth();
 
@@ -109,6 +104,34 @@ public class PriorityCalculator implemen
       binCountScaleFactors[i] = binCountScaleFactor;
       weightedMinimumDepths[i] = currentMinimumDepth / binCountScaleFactor;
     }
+    
+  }
+  
+  /** Log a preload request for this priority value.
+  */
+  public void makePreloadRequest()
+  {
+    for (int i = 0; i < binNames.length; i++)
+    {
+      String binName = binNames[i];
+      rt.addPreloadRequest(binName, weightedMinimumDepths[i]);
+    }
+
+  }
+
+  /** Calculate a document priority value.  Priorities are reversed, and in log space, so
that
+  * zero (0.0) is considered the highest possible priority, and larger priority values are
considered lower in actual priority.
+  *@param binNames are the global bins to which the document belongs.
+  *@param connection is the connection, from which the throttles may be obtained.  More highly
throttled connections are given
+  *          less favorable priority.
+  *@return the priority value, based on recent history.  Also updates statistics atomically.
+  */
+  @Override
+  public double getDocumentPriority()
+    throws ManifoldCFException
+  {
+     if (cachedValue != null)
+       return cachedValue.doubleValue();
 
     double highestAdjustedCount = 0.0;
     // Find the bin with the largest effective count, and use that for the document's priority.
@@ -149,6 +172,7 @@ public class PriorityCalculator implemen
       Logging.scheduling.debug("Document with bins ["+sb.toString()+"] given priority value
"+new Double(returnValue).toString());
     }
 
+    cachedValue = new Double(returnValue);
 
     return returnValue;
   }

Modified: manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ReprioritizationTracker.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ReprioritizationTracker.java?rev=1546549&r1=1546548&r2=1546549&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ReprioritizationTracker.java
(original)
+++ manifoldcf/branches/CONNECTORS-781/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ReprioritizationTracker.java
Fri Nov 29 12:26:53 2013
@@ -42,7 +42,12 @@ public class ReprioritizationTracker
   /** Lock manager */
   protected final ILockManager lockManager;
   protected final IBinManager binManager;
-  
+
+  /** Preload requests */
+  protected final Map<String,PreloadRequest> preloadRequests = new HashMap<String,PreloadRequest>();
+  /** Preload values */
+  protected final Map<String,PreloadedValues> preloadedValues = new HashMap<String,PreloadedValues>();
+    
   /** Constructor.
   */
   public ReprioritizationTracker(IThreadContext threadContext)
@@ -247,6 +252,50 @@ public class ReprioritizationTracker
     }
   }
   
+  /** Note preload amounts.
+  */
+  public void addPreloadRequest(String binName, double weightedMinimumDepth)
+  {
+    PreloadRequest pr = preloadRequests.get(binName);
+    if (pr == null)
+    {
+      pr = new PreloadRequest(weightedMinimumDepth);
+      preloadRequests.put(binName,pr);
+    }
+    else
+      pr.updateRequest(weightedMinimumDepth);
+  }
+  
+  
+  /** Preload bin values.  Call this OUTSIDE of a transaction.
+  */
+  public void preloadBinValues()
+    throws ManifoldCFException
+  {
+    for (String binName : preloadRequests.keySet())
+    {
+      PreloadRequest pr = preloadRequests.get(binName);
+      double[] newValues = binManager.getIncrementBinValuesInTransaction(binName, pr.getWeightedMinimumDepth(),
pr.getRequestCount());
+      PreloadedValues pv = new PreloadedValues(newValues);
+      preloadedValues.put(binName,pv);
+    }
+    preloadRequests.clear();
+  }
+  
+  /** Clear any preload requests.
+  */
+  public void clearPreloadRequests()
+  {
+    preloadRequests.clear();
+  }
+  
+  /** Clear remaining preloaded values.
+  */
+  public void clearPreloadedValues()
+  {
+    preloadedValues.clear();
+  }
+
   /** Get a bin value.
   *@param binName is the bin name.
   *@param weightedMinimumDepth is the minimum depth to use.
@@ -255,6 +304,13 @@ public class ReprioritizationTracker
   public double getIncrementBinValue(String binName, double weightedMinimumDepth)
     throws ManifoldCFException
   {
+    PreloadedValues pv = preloadedValues.get(binName);
+    if (pv != null)
+    {
+      Double rval = pv.getNextValue();
+      if (rval != null)
+        return rval.doubleValue();
+    }
     return binManager.getIncrementBinValues(binName, weightedMinimumDepth,1)[0];
   }
   
@@ -428,5 +484,56 @@ public class ReprioritizationTracker
     lockManager.writeData(trackerMinimumDepthResource,data);
   }
   
+  /** A preload request */
+  protected static class PreloadRequest
+  {
+    protected double weightedMinimumDepth;
+    protected int requestCount;
+    
+    public PreloadRequest(double weightedMinimumDepth)
+    {
+      this.weightedMinimumDepth = weightedMinimumDepth;
+      this.requestCount = 1;
+    }
+    
+    public void updateRequest(double weightedMinimumDepth)
+    {
+      if (this.weightedMinimumDepth < weightedMinimumDepth)
+        this.weightedMinimumDepth = weightedMinimumDepth;
+      requestCount++;
+    }
+    
+    public double getWeightedMinimumDepth()
+    {
+      return weightedMinimumDepth;
+    }
+    
+    public int getRequestCount()
+    {
+      return requestCount;
+    }
+  }
+  
+  /** A set of preloaded values */
+  protected static class PreloadedValues
+  {
+    protected double[] values;
+    protected int valueIndex;
+    
+    public PreloadedValues(double[] values)
+    {
+      this.values = values;
+      this.valueIndex = valueIndex;
+    }
+    
+    public Double getNextValue()
+    {
+      if (valueIndex == values.length)
+        return null;
+      return new Double(values[valueIndex++]);
+    }
+  }
+  
+
 }
 



Mime
View raw message