incubator-couchdb-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Chris Anderson <jch...@apache.org>
Subject Re: calulate doc popularity with couchdb map/reduce
Date Wed, 10 Jun 2009 15:55:18 GMT
On Wed, Jun 10, 2009 at 6:06 AM, Benoit Chesneau<bchesneau@gmail.com> wrote:
> I improved a little the map/reduce code but still same error. I don't
> know where I am wrong at this point. If anyone have an idee please
> tell me.
>

It looks like you are building up a hash of scores in your reduce. Can
you get the same results by keeping the reduce ignorant of keys, and
just calculating the score for whatever keys are in the range. Then
you can use a group query and sort by value on the client.

Anything involving relative popularity of certain items will have a
sorting phase outside of CouchDB's first pass of map reduce.

> - benoît
>
> map :
> function(doc) {
>  // !code vendor/couchapp/date.js
>
>  if (doc.type == "vote") {
>    var d = new Date().setRFC3339(doc.d).valueOf();
>    emit(doc.itemid, {d: d, v: doc.v});
>  }
> }
>
> reduce :
> function(keys, values, rereduce) {
>  /* we use reddit algorithm to calculate scores
>  http://blog.linkibol.com/post/How-to-Build-a-Popularity-Algorithm-You-can-be-Proud-of.aspx
>  http://news.ycombinator.com/item?id=231168 */
>
>  // all started at this date
>
>
>  function score(A, B, x) {
>    var t = A - B;
>    if (x  > 0) {
>      y = 1;
>    } else if (x  == 0) {
>      y = 0;
>    } else {
>      y = -1;
>    }
>    z = (Math.abs(x) >=1 && Math.abs(x) || 1);
>    log([t, x, y, z, Math.log(z) + (y*t)/45000])
>    return (Math.log(z) + (y*t)/45000);
>  }
>
>
>  var scores = {};
>  var points = {};
>  if (!rereduce) {
>    // Thu May 28 11:16:49 2009 +0200
>    var B = 1243502209000;
>    var oldkey = "";
>    for (var k in keys) {
>      if (oldkey != keys[k][0] && points[oldkey]) {
>        scores[oldkey] = score(points[oldkey].A, B, points[oldkey].x )
>      }
>
>      if (!points[keys[k][0]]) {
>        oldkey = keys[k][0];
>        points[keys[k][0]] = {
>          A: values[k].d,
>          x: 0
>        }
>      }
>      points[keys[k][0]].x += values[k].v;
>
>    }
>    lastkey = keys[keys.length-1][0];
>    scores[lastkey] = score(points[lastkey].A, B, points[lastkey].x )
>    log(scores);
>  } else {
>    scores = values[0];
>    for(var v = 1; v < values.length; v++) {
>      for(var s in values[v]) {
>        if (scores[s])
>          scores[s] += values[v][s];
>        else
>          scores[s] = values[v][s];
>      }
>    }
>
>  }
>
>  var top = [];
>  for (k in scores)
>    top.push([k, scores[k]]);
>  top.sort(function(a, b) { return a[1] - b[1] });
>
>  for(var n = 240; n < top.length; n++)
>    if(top[n][0] != lastkey) scores[top[n][0]] = undefined;
>
>  return scores;
> }
>



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Mime
View raw message