couchdb-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Randall Leeds <randall.le...@gmail.com>
Subject Re: Map reduce and weird output question
Date Sun, 07 Mar 2010 23:28:14 GMT
I'm not an expert on this, but I think you need to create your own
reduce function and output the number of keys rather than the sum of
the values.

On Sun, Mar 7, 2010 at 15:15, Gregory Tappero <coulix@gmail.com> wrote:
> Thank you Pawel,
>
> If i try to follow your way it gives me the count of docs in a given
> day for each username, what i would like is the count of unique
> usernames for a given day.
>
> function(doc) {
>
>    if (doc.doc_type=="EdoPing" && doc.em_type==0) {
>        date = new Date().setRFC3339(doc.created_at);
>        emit([date.getFullYear(), parseInt(date.getMonth())+1,
> date.getDate(), doc.em_uname] , 1);
>
>    }
> }
>
> Reduce:
>  _count
>
> =================
> I get:
>
> [2010, 3, 3, "student1"]         5
> [2010, 3, 4, "student1"]         18
> [2010, 3, 5, "eong"]             77
> [2010, 3, 6, "bkante"]           71
> [2010, 3, 6, "jfrancillette"]    72
> [2010, 3, 6, "mlouviers"]        12
> [2010, 3, 7, "student1"]         4
>
> I would like to extract the following
>
> [2010, 3, 3]       1
> [2010, 3, 4]       1
> [2010, 3, 5]    1
> [2010, 3, 6]       3
> [2010, 3, 7]       1
>
>
> if i do a group_level=3 it sum the values.
>
> {"key":[2010,3,3],"value":5},
> {"key":[2010,3,4],"value":18},
> {"key":[2010,3,5],"value":77},
> {"key":[2010,3,6],"value":155},
> {"key":[2010,3,7],"value":4}
>
> How can i count the unique username emitter per day ?
>
>
>
>
> On Sun, Mar 7, 2010 at 10:02 PM, Paweł Stawicki <pawelstawicki@gmail.com> wrote:
>> Just emit all documents with em_type = 0 in map function, with [date,
>> em_uname] as key. Then count in reduce.
>>
>> Map:
>> function(doc) {
>>  if (doc.em_type = 0) {
>>    //If you only want to count, you can emit anything (e.g. 1) instead of
>> doc here.
>>    emit([date, em_uname], doc);
>>  }
>> }
>>
>> Reduce:
>> function(keys, values, rereduce) {
>>  if (!rereduce) {
>>    return count_of_values;
>>  } else {
>>    return sum_of_values;
>>  }
>>
>>  //If you return 1 from emit instead of doc, then count_of_values ==
>> sum_of_values
>> }
>>
>> Then you can handle everything by grouping:
>> http://yourserver:5984/yourdb/_view/yourview?group_level=2
>> or group=true
>>
>> Regards
>> --
>> Paweł Stawicki
>> http://pawelstawicki.blogspot.com
>> http://szczecin.jug.pl
>>
>>
>>
>> On Sat, Mar 6, 2010 at 16:26, Gregory Tappero <coulix@gmail.com> wrote:
>>
>>> Hello everyone,
>>>
>>> I have the following EdoPing 's type of documents
>>>
>>> {
>>>   "_id": "22add509c1e7bc286832edc5bfe99ce5",
>>>   "_rev": "1-49663ab8778f445e481143120d0d7086",
>>>   "doc_type": "EdoPing",
>>>   "em_uname": "student1",
>>>   "em_gid": 1,
>>>   "created_at": "2010-03-03T14:18:19Z",
>>>   "em_ip": "92.154.70.148",
>>>   "em_type": 0,
>>>   "room_url": "z2fudcvcrfa3reaydatre",
>>>   "room_users": [
>>>       "tutorsbox"
>>>   ]
>>> }
>>>
>>> i would like to count all unique em_uname of em_type 0 on a given day date.
>>>
>>> For now i used this map/reduce
>>> http://friendpaste.com/5xUUQ26bbl9d5KRB8eojwe
>>>
>>> Date.prototype.setRFC3339 = function(dString){
>>>    var regexp =
>>>
>>> /(\d\d\d\d)(-)?(\d\d)(-)?(\d\d)(T)?(\d\d)(:)?(\d\d)(:)?(\d\d)(\.\d+)?(Z|([+-])(\d\d)(:)?(\d\d))/;
>>>
>>>    if (dString.toString().match(new RegExp(regexp))) {
>>>        var d = dString.match(new RegExp(regexp));
>>>        var offset = 0;
>>>
>>>        this.setUTCDate(1);
>>>        this.setUTCFullYear(parseInt(d[1],10));
>>>        this.setUTCMonth(parseInt(d[3],10) - 1);
>>>        this.setUTCDate(parseInt(d[5],10));
>>>        this.setUTCHours(parseInt(d[7],10));
>>>        this.setUTCMinutes(parseInt(d[9],10));
>>>        this.setUTCSeconds(parseInt(d[11],10));
>>>        if (d[12])
>>>            this.setUTCMilliseconds(parseFloat(d[12]) * 1000);
>>>        else
>>>            this.setUTCMilliseconds(0);
>>>        if (d[13] != 'Z') {
>>>            offset = (d[15] * 60) + parseInt(d[17],10);
>>>            offset *= ((d[14] == '-') ? -1 : 1);
>>>            this.setTime(this.getTime() - offset * 60 * 1000);
>>>        }
>>>    } else {
>>>        this.setTime(Date.parse(dString));
>>>    }
>>>    return this;
>>> };
>>>
>>> var seenKeys = new Array();
>>>
>>> function(doc) {
>>>
>>>
>>>    if (doc.doc_type=="EdoPing" && doc.em_type==0) {
>>>        date = new Date().setRFC3339(doc.created_at);
>>>        var key = doc.em_uname + String(doc.created_at).substring(0,10);
>>>        if (seenKeys[key] ==  undefined  ) {
>>>            seenKeys[key] = 1;
>>>            emit([date.getFullYear(), parseInt(date.getMonth())+1,
>>> date.getDate() ] , 1);
>>>         }
>>>    }
>>> }
>>>
>>>
>>> It works when saved for this first time but as soon as new EdoPings
>>> get added it starts emitting rows it has already seen ! (same key)
>>> creating faulty count results.
>>>
>>> Is it ok to have seenKeys outside of the doc function() ?
>>> What other way could i use to get the same results ?
>>>
>>> Thanks,
>>>
>>> Greg
>>>
>>
>
>
>
> --
> Greg Tappero
> CTO co founder Edoboard
> http://www.edoboard.com
> +33 0645764425
>

Mime
View raw message