couchdb-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Benoit Chesneau <bchesn...@gmail.com>
Subject calulate doc popularity with couchdb map/reduce
Date Wed, 10 Jun 2009 10:25:41 GMT
Hi all,

I'm trying to calculate popularity of a doc wiyth couchdb but for now
have "reduce_overflow_error" error. What i do is saving each vote as a
doc :

{
  docid: id of document where vote happened,
  v :  value of doc,
  d: date when docid have been created

}

Then I do map/reduce over it :

map :
function(doc) {
  if (doc.type == "vote") {
    emit(doc.itemid, {d: doc.d, v: doc.v});
  }
}

reduce :
function(keys, values, rereduce) {
  // !code vendor/couchapp/date.js

  /* we use reddit algorithm to calculate scores
  http://blog.linkibol.com/post/How-to-Build-a-Popularity-Algorithm-You-can-be-Proud-of.aspx
  http://news.ycombinator.com/item?id=231168 */

  // all started at this date

  var scores = {};
  var points = {};
  if (!rereduce) {
    var B = new Date("Thu May 28 11:16:49 2009 +0200").valueOf();
    for (var k in keys) {
      if (!scores[keys[k][0]]) {
        scores[keys[k][0]] = 0;
        points[keys[k][0]] = {
          A: new Date().setRFC3339(values[k].d).valueOf(),
          x: 0
        }
      }

      var t = points[keys[k][0]].A - B;
      points[keys[k][0]].x += values[k].v;

      if (points[keys[k][0]].x  > 0) {
        y = 1;
      } else if (points[keys[k][0]].x  == 0) {
        y = 0;
      } else {
        y = -1;
      }

      z = (Math.abs(points[keys[k][0]].x) >=1 && points[keys[k][0]].x  || 1);
      scores[keys[k][0]] = Math.log(z) + (y*t)/45000;

    }
    lastkey = keys[keys.length-1][0];

  } else {
    scores = values[0];
    for(var v = 1; v < values.length; v++) {
      for(var s in values[v]) {
        if (scores[s])
          scores[s] += values[v][s];
        else
          scores[s] = values[v][s];
      }
    }
  }

  var top = [];
  for (k in scores)
    top[top.length] = [k, scores[k]];
  top.sort(function(a, b) { return a[1] - b[1] });

  for(var n = 20; n < top.length; n++)
    if(top[n][0] != lastkey) scores[top[n][0]] = undefined;

  return scores;
}

Is this kind of reduce ok  with couchdb ? The other solution would be
saving scores without sorting them in a temporary db and make another
map/reuce over it. Maybe I should go that way, but I wonder if
somepone have a better idee ?

- benoƮt

Mime
View raw message