incubator-couchdb-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Dirkjan Ochtman <dirk...@ochtman.nl>
Subject Complex view
Date Sat, 22 May 2010 09:08:27 GMT
Hi there,

I've been trying to wrap my head about a fairly complex view this
week. So far, I haven't been successful, and I'm not really sure
CouchDB can do what I'm trying to do here. I'd appreciate it if people
review my code/ideas -- I'd like to either solve this problem or
understand why it can't work. Working on this thing has enhanced my
understanding of map-reduce, but I'd like to get a better feel for
where the limits of the system are.

On to the problem: the basic problem is that I have three types of
documents, and I want to pick a value from each type for some common
key and aggregate that. Specifically, I want the sum over a range of
objects for a multiplication of the three values. Here's some example
data:

{"type": "a", "date": "2010-05-18", "object": "aaa", "range": 3.5},
{"type": "b", "date": "2010-05-18", "object": "aaa", "value": 2},
{"type": "c", "date": "2010-05-18", "object": "aaa", "moved": 0.5},

However, I don't have all three types for all objects. For those
objects, I don't want to count any data. For example:

{"type": "a", "date": "2010-05-18", "object": "bbb", "range": 4.5},
{"type": "b", "date": "2010-05-18", "object": "bbb", "value": 10},

For these five documents, the value I want is 3.5 (3.5 * 2 * 0.5).
Here's my map function:

function map(doc) {
  if (doc.type == 'a') {
    emit(doc.date.split('-').concat([doc.object, 0]), doc.range);
  } else if (doc.type == 'b') {
    emit(doc.date.split('-').concat([doc.object, 1]), doc.value);
  } else if (doc.type == 'c') {
    emit(doc.date.split('-').concat([doc.object, 2]), doc.moved);
  }
}

resulting in these map results for the documents above:

["2010", "05", "18", "aaa", 0] = 3.5
["2010", "05", "18", "aaa", 1] = 2
["2010", "05", "18", "aaa", 2] = 0.5
["2010", "05", "18", "bbb", 0] = 4.5
["2010", "05", "18", "bbb", 1] = 10

That part is fairly straightforward. However, the reduce phase is what
gets me into trouble. On the one hand, I want to discard all the
incomplete triples (for example, with "object": "bbb" above). On the
other hand, the reduce range start or end might fall between ["aaa",
1] and ["aaa", 2], so that some set that is incomplete in this reduce
might still be completed in a higher-up rereduce. But, if I pass up
all the incomplete sets, the reduce result grows unbounded, which
doesn't work. So I was assuming that reduce gets a contiguous range of
keys (and values), so that I could assume that only the first and last
sets passed in where eligible for completion at a later rereduce stage
(sowing together the seams at rereduce time). Here's the code:

function(key, values, rereduce) {

  if (rereduce) {
    var red = 0.0;
    for (var i in values) {

      i = parseInt(i);
      red += values[i][0];
      if (i == values.length - 1) continue;
      if (!(values[i][2] && values[i + 1][1])) continue;
      if (values[i][2][0] != values[i + 1][1][0]) continue;

      // if right-side of current and left-side of next match up
      // and gets to three elements combined, use the result:

      var cur = values[i][2][1];
      for (var j in values[i + 1][1][1]) {
        cur[j] = values[i + 1][1][1][j];
      }
      if (cur[0] && cur[1] && cur[2]) {
        red += cur[0] * cur[1] * cur[2];
      }

    }
    return [red, values[0][1], values[values.length - 1][2]];
  }

  // combine results for all the different objects

  var objs = {};
  for (var i in key) {
    var c = key[i][0][3];
    if (!objs[c]) {
      objs[c] = {};
    }
    objs[c][key[i][0][4]] = values[i];
  }

  // sum products of values for complete sets
  // pass up left-most and right-most sets if incomplete

  var fn = key[0][0][3];
  var first = null;
  var ln = key[key.length - 1][0][3];
  var last = null;
  var value = 0.0;
  for (var i in objs) {
    if (objs[i][0] && objs[i][1] && objs[i][2]) {
      value += objs[i][0] * objs[i][1] * objs[i][2];
    } else if (i == fn) {
      first = [i, objs[i]];
    } else if (i == ln) {
      last = [i, objs[i]];
    }
  }

  return [value, first, last];

}

However, this doesn't work. I'm not exactly sure why... It could
certainly be that my assumption about reduce keys being contiguous is
wrong, but that just makes me wonder why. It seems like this reduce
function is still commutative and associative, as is required. Any
help much appreciated!

Cheers,

Dirkjan

Mime
View raw message