mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ssla...@apache.org
Subject svn commit: r1513632 - in /mahout/trunk: CHANGELOG core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Date Tue, 13 Aug 2013 20:08:17 GMT
Author: sslavic
Date: Tue Aug 13 20:08:16 2013
New Revision: 1513632

URL: http://svn.apache.org/r1513632
Log:
MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob downsampling code where
precision should have been retained

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1513632&r1=1513631&r2=1513632&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Tue Aug 13 20:08:16 2013
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.9 - unreleased
 
+  MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob downsampling code
where precision should have been retained (sslavic) 
+
   MAHOUT-1301: toString() method of SequentialAccessSparseVector has excess comma at the
end (Alexander Senov, smarthi)
 
   MAHOUT-1296: Remove deprecated algorithms (ssc)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1513632&r1=1513631&r2=1513632&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Tue Aug 13 20:08:16 2013
@@ -288,7 +288,7 @@ public class RowSimilarityJob extends Ab
     private Vector sampleDown(Vector rowVector, Context ctx) {
 
       int observationsPerRow = rowVector.getNumNondefaultElements();
-      double rowSampleRate = Math.min(maxObservationsPerRow, observationsPerRow) / observationsPerRow;
+      double rowSampleRate = (double) Math.min(maxObservationsPerRow, observationsPerRow)
/ (double) observationsPerRow;
 
       Vector downsampledRow = rowVector.like();
       long usedObservations = 0;
@@ -297,7 +297,7 @@ public class RowSimilarityJob extends Ab
       for (Vector.Element elem : rowVector.nonZeroes()) {
 
         int columnCount = observationsPerColumn.get(elem.index());
-        double columnSampleRate = Math.min(maxObservationsPerColumn, columnCount) / columnCount;
+        double columnSampleRate = (double) Math.min(maxObservationsPerColumn, columnCount)
/ (double) columnCount;
 
         if (random.nextDouble() <= Math.min(rowSampleRate, columnSampleRate)) {
           downsampledRow.setQuick(elem.index(), elem.get());



Mime
View raw message