couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Couchdb Wiki] Update of "View_Snippets" by SebastianCohnen
Date Sun, 02 May 2010 18:18:12 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Couchdb Wiki" for change notification.

The "View_Snippets" page has been changed by SebastianCohnen.
The comment on this change is: added syntax-hl; added toc; fixed example indentation; cleanup.
http://wiki.apache.org/couchdb/View_Snippets?action=diff&rev1=36&rev2=37

--------------------------------------------------

  
  == Get docs with a particular user id ==
  
- {{{
+ {{{#!highlight javascript
+ // map
- map: function(doc) {
+ function(doc) {
    if (doc.user_id) {
      emit(doc.user_id, null);
    }
  }
  }}}
  
- Then query with key=USER_ID to get all the rows that match that user.
+ Then query with ''key=USER_ID'' to get all the rows that match that user.
  
  
  == Get all documents which have an attachment ==
  
  This lists only the documents which have an attachment.
  
- {{{
+ {{{#!highlight javascript
+ // map
- map: function(doc) {
+ function(doc) {
    if (doc._attachments) {
      emit(doc._id, null);
    }
@@ -45, +47 @@

  
  Call this with ''group=true'' or you only get the combined number of documents with and
without attachments.
  
- {{{
+ {{{#!highlight javascript
+ // map
- map: function(doc) {
+ function(doc) {
    if (doc._attachments) {
      emit("with attachment", 1);
    }
@@ -54, +57 @@

      emit("without attachment", 1); 
    }
  }
+ }}}
+ 
+ {{{#!highlight javascript
+ // reduce
- reduce: function(keys, values) {
+ function(keys, values) {
     return sum(values);
  }
  }}}
@@ -73, +80 @@

  
  == Generating a list of unique values ==
  
- Here we use the fact that the key for a view result can be an array. Suppose you have a
map that generates (key, value) pairs with many duplicates and you want to remove the duplicates.
To do so, use ([key, value], null) as the map output.
+ Here we use the fact that the key for a view result can be an array. Suppose you have a
map that generates (key, value) pairs with many duplicates and you want to remove the duplicates.
To do so, use {{{emit([key, value], null)}}} as the map output.
  
  Call this with ''group=true'' or you only get ''null''.
  
- {{{
+ {{{#!highlight javascript
+ // map
- map: function(doc) {
+ function(doc) {
    for (var i in doc.links)
      emit([doc.parent, i], null);
    }
  }
+ }}}
+ 
+ {{{#!highlight javascript
+ // reduce
- reduce: function(keys, values) {
+ function(keys, values) {
     return null;
  }
  }}}
  
  This will give you results like
- {{{
+ {{{#!highlight javascript
  {"rows":[
  {"key":["thisparent","thatlink"],"value":null},
  {"key":["thisparent","thatotherlink"],"value":null}
@@ -100, +112 @@

  
  Note that the trick here is using the key for what you want to make unique. You can combine
this with the counting above to get a count of duplicate values:
  
- {{{
+ {{{#!highlight javascript
+ // map
- map: function(doc) {
+ function(doc) {
    for (var i in doc.links)
      emit([doc.parent, i], 1);
    }
  }
+ }}}
+ 
+ {{{#!highlight javascript
+ // reduce
- reduce: function(keys, values) {
+ function(keys, values) {
     return sum(values);
  }
  }}}
@@ -121, +138 @@

  
  Use a standard counting emit function:
  
- {{{
+ {{{#!highlight javascript
- function(doc)
+ function(doc) {
- {
-     for(var idx in doc.tags)
+     for(var idx in doc.tags) {
-     {
          emit(doc.tags[idx], 1);
      }
  }
@@ -133, +148 @@

  
  Notice that `MAX` is the number of tags to return. Technically this snippet relies on an
implementation artifact that CouchDB will send keys in sorted order to the reduce functions,
thus it'd break subtly if this stopped being true. Buyer beware!
  
- {{{
+ {{{#!highlight javascript
  function(keys, values, rereduce)
  {
      var MAX = 3;
@@ -218, +233 @@

  
  Here is a modified example from the [[View_collation|View collation]] page.  Note that `group_level`
needs to be set to `1` for it to return a meaningful `customer_details`.
  
- {{{
+ {{{#!highlight javascript
  // Map function
  function(doc) {
    if (doc.Type == "customer") {
@@ -254, +269 @@

  == Computing the standard deviation ==
  This example is from the couchdb test-suite. It is '''much''' easier and less complex then
following example ([[#summary_stats|Computing simple summary statistics (min,max,mean,standard
deviation)]]) although it does not calculate min,max and mean (but this should be an easy
exercise).
  
- {{{
+ {{{#!highlight javascript
  // Map
  function (doc) {
    emit(doc.val, doc.val)
- };
+ }
  }}}
  
- {{{
+ {{{#!highlight javascript
  // Reduce
  function (keys, values, rereduce) {
      // This computes the standard deviation of the mapped results
@@ -309, +324 @@

  
  Note that the view is specialized to my dataset, but the reduce function is written to be
fairly generic.  I kept the view as is because I'm too lazy to write up a generic view, and
also because when I wrote it I wasn't sure one could use Date, Math, and Reg``Exp in Couch``DB
Java``Script.  
  
- {{{
+ {{{#!highlight javascript
  // Map function
  function(doc) {
      var risk_exponent = 
- 	-3.194 +
+     -3.194 +
- 	doc.CV_VOLOCC_1                 *1.080 +
+     doc.CV_VOLOCC_1                 *1.080 +
- 	doc.CV_VOLOCC_M                 *0.627 +
+     doc.CV_VOLOCC_M                 *0.627 +
- 	doc.CV_VOLOCC_R                 *0.553 +
+     doc.CV_VOLOCC_R                 *0.553 +
- 	doc.CORR_VOLOCC_1M              *1.439 +
+     doc.CORR_VOLOCC_1M              *1.439 +
- 	doc.CORR_VOLOCC_MR              *0.658 +
+     doc.CORR_VOLOCC_MR              *0.658 +
- 	doc.LAG1_OCC_M                  *0.412 +
+     doc.LAG1_OCC_M                  *0.412 +
- 	doc.LAG1_OCC_R                  *1.424 +
+     doc.LAG1_OCC_R                  *1.424 +
- 	doc.MU_VOL_1                    *0.038 +
+     doc.MU_VOL_1                    *0.038 +
- 	doc.MU_VOL_M                    *0.100 +
+     doc.MU_VOL_M                    *0.100 +
- 	doc["CORR_OCC_1M X MU_VOL_M"]      *-0.168 +
+     doc["CORR_OCC_1M X MU_VOL_M"]      *-0.168 +
- 	doc["CORR_OCC_1M X SD_VOL_R" ]     *0.479 +
+     doc["CORR_OCC_1M X SD_VOL_R" ]     *0.479 +
- 	doc["CORR_OCC_1M X LAG1_OCC_R"]    *-1.462 ;
+     doc["CORR_OCC_1M X LAG1_OCC_R"]    *-1.462 ;
      
      var risk = Math.exp(risk_exponent);
      
@@ -386, +401 @@

  
      */
      function combine_S(partitionA,partitionB){
- 	var NewS=partitionA.S;
+     var NewS=partitionA.S;
- 	var NewSum=partitionA.Sum;
+     var NewSum=partitionA.Sum;
- 	var min = partitionA.min;
+     var min = partitionA.min;
- 	var max = partitionA.max;
+     var max = partitionA.max;
- 	var M = partitionB.M;
+     var M = partitionB.M;
- 	if(!M){M=0;}
+     if(!M){M=0;}
+     if(M){
- 	if(M){
- 	    var diff = 
- 		((partitionA.M * partitionB.Sum / partitionB.M) - partitionA.Sum );
+       var diff = ((partitionA.M * partitionB.Sum / partitionB.M) - partitionA.Sum );
- 	    
+     
- 	    NewS += partitionB.S + partitionB.M*diff*diff/(partitionA.M * (partitionA.M+partitionB.M)
);
+     NewS += partitionB.S + partitionB.M*diff*diff/(partitionA.M * (partitionA.M+partitionB.M)
);
- 	    NewSum += partitionB.Sum ;
+     NewSum += partitionB.Sum ;
  
- 	    min = Math.min(partitionB.min, min);
+     min = Math.min(partitionB.min, min);
- 	    max = Math.max(partitionB.max, max);
+     max = Math.max(partitionB.max, max);
- 	}
+ }
- 	return {'S':NewS,'Sum':NewSum, 'M': partitionA.M+M, 'min':min, 'max':max };
+ return {'S':NewS,'Sum':NewSum, 'M': partitionA.M+M, 'min':min, 'max':max };
-     }
- 	    
+     }
+ 
  
      /*
  
@@ -456, +470 @@

  
      */
      function pairwise_update (values, M, Sum, S, min, max, key){
- 	if(!key){key='risk';}
+   if(!key){key='risk';}
- 	if(!Sum){Sum = 0; S = 0; M=0;}
+   if(!Sum){Sum = 0; S = 0; M=0;}
- 	if(!S){Sum = 0; S = 0; M=0;}
+   if(!S){Sum = 0; S = 0; M=0;}
- 	if(!M){Sum = 0; S = 0; M=0;}
+   if(!M){Sum = 0; S = 0; M=0;}
- 	if(!min){ min = Infinity; }
+   if(!min){ min = Infinity; }
- 	if(!max){ max = -Infinity; }
+   if(!max){ max = -Infinity; }
- 	var T;
+   var T;
- 	var stack_ptr=1;
+   var stack_ptr=1;
- 	var N = values.length;
+   var N = values.length;
- 	var half = Math.floor(N/2);
+   var half = Math.floor(N/2);
- 	var NewSum;
+   var NewSum;
- 	var NewS ;
+   var NewS ;
- 	var SumA=[];
+   var SumA=[];
- 	var SA=[];
+   var SA=[];
- 	var Terms=[];
+   var Terms=[];
- 	Terms[0]=0;
+   Terms[0]=0;
- 	if(N == 1){
+   if(N == 1){
- 	    Nsum=values[0][key];
+       Nsum=values[0][key];
- 	    Ns=0;
+       Ns=0;
- 	}else if(N > 1){
+   }else if(N > 1){
- 	    // loop over the data pairwise
+       // loop over the data pairwise
- 	    for(var i = 0; i < half; i++){
+       for(var i = 0; i < half; i++){
- 		// check min max
+     // check min max
- 		if(values[2*i+1][key] < values[2*i][key] ){
+     if(values[2*i+1][key] < values[2*i][key] ){
- 		    min = Math.min(values[2*i+1][key], min);
+         min = Math.min(values[2*i+1][key], min);
- 		    max = Math.max(values[2*i][key], max);
+         max = Math.max(values[2*i][key], max);
- 		}else{
+     }else{
- 		    min = Math.min(values[2*i][key], min);
+         min = Math.min(values[2*i][key], min);
- 		    max = Math.max(values[2*i+1][key], max);
+         max = Math.max(values[2*i+1][key], max);
- 		}
+     }
- 		SumA[stack_ptr]=values[2*i+1][key] + values[2*i][key];
+     SumA[stack_ptr]=values[2*i+1][key] + values[2*i][key];
- 		var diff = values[2*i + 1][key] - values[2*i][key] ;
+     var diff = values[2*i + 1][key] - values[2*i][key] ;
- 		SA[stack_ptr]=( diff * diff ) / 2;
+     SA[stack_ptr]=( diff * diff ) / 2;
- 		Terms[stack_ptr]=2;
+     Terms[stack_ptr]=2;
- 		while( Terms[stack_ptr] == Terms[stack_ptr-1]){
+     while( Terms[stack_ptr] == Terms[stack_ptr-1]){
- 		    // combine the top two elements in storage, as
+         // combine the top two elements in storage, as
- 		    // they have equal numbers of support terms.  this
+         // they have equal numbers of support terms.  this
- 		    // should happen for powers of two (2, 4, 8, etc).
+         // should happen for powers of two (2, 4, 8, etc).
- 		    // Everything else gets cleaned up below
+         // Everything else gets cleaned up below
- 		    stack_ptr--;
+         stack_ptr--;
- 		    Terms[stack_ptr]*=2;
+         Terms[stack_ptr]*=2;
- 		    // compare this diff with the below diff.  Here
+         // compare this diff with the below diff.  Here
- 		    // there is no multiplication and division of the
+         // there is no multiplication and division of the
- 		    // first sum (SumA[stack_ptr]) because it is the
+         // first sum (SumA[stack_ptr]) because it is the
- 		    // same size as the other.
+         // same size as the other.
- 		    var diff = SumA[stack_ptr] - SumA[stack_ptr+1];
+         var diff = SumA[stack_ptr] - SumA[stack_ptr+1];
- 		    SA[stack_ptr]=  SA[stack_ptr] + SA[stack_ptr+1] +
+         SA[stack_ptr]=  SA[stack_ptr] + SA[stack_ptr+1] +
- 			(diff * diff)/Terms[stack_ptr];
+       (diff * diff)/Terms[stack_ptr];
- 		    SumA[stack_ptr] += SumA[stack_ptr+1];
+         SumA[stack_ptr] += SumA[stack_ptr+1];
- 		} // repeat as needed
+     } // repeat as needed
- 		stack_ptr++;
+     stack_ptr++;
- 	    }
+       }
- 	    stack_ptr--;
+       stack_ptr--;
- 	    // check if N is odd
+       // check if N is odd
- 	    if(N % 2 !=  0){
+       if(N % 2 !=  0){
- 		// handle that dangling entry
+     // handle that dangling entry
- 		stack_ptr++;
+     stack_ptr++;
- 		Terms[stack_ptr]=1;
+     Terms[stack_ptr]=1;
- 		SumA[stack_ptr]=values[N-1][key];
+     SumA[stack_ptr]=values[N-1][key];
- 		SA[stack_ptr]=0;  // the variance of a single observation is zero!
+     SA[stack_ptr]=0;  // the variance of a single observation is zero!
- 		min = Math.min(values[N-1][key], min);
+     min = Math.min(values[N-1][key], min);
- 		max = Math.max(values[N-1][key], max);
+     max = Math.max(values[N-1][key], max);
- 	    }
+       }
- 	    T=Terms[stack_ptr];
+       T=Terms[stack_ptr];
- 	    NewSum=SumA[stack_ptr];
+       NewSum=SumA[stack_ptr];
- 	    NewS= SA[stack_ptr];
+       NewS= SA[stack_ptr];
- 	    if(stack_ptr > 1){
+       if(stack_ptr > 1){
- 		// values.length is not power of two, so not
+     // values.length is not power of two, so not
- 		// everything has been scooped up in the inner loop
+     // everything has been scooped up in the inner loop
- 		// above.  Here handle the remainders
+     // above.  Here handle the remainders
- 		for(var i = stack_ptr-1; i>=1 ; i--){
+     for(var i = stack_ptr-1; i>=1 ; i--){
- 		    // compare this diff with the above diff---one
+         // compare this diff with the above diff---one
- 		    // more multiply and divide on the current sum,
+         // more multiply and divide on the current sum,
- 		    // because the size of the sets (SumA[i] and NewSum)
+         // because the size of the sets (SumA[i] and NewSum)
- 		    // are different.
+         // are different.
- 		    var diff = Terms[i]*NewSum/T-SumA[i]; 
+         var diff = Terms[i]*NewSum/T-SumA[i]; 
- 		    NewS = NewS + SA[i] + 
+         NewS = NewS + SA[i] + 
- 			( T * diff * diff )/
+       ( T * diff * diff )/
- 			(Terms[i] * (Terms[i] + T));
+       (Terms[i] * (Terms[i] + T));
- 		    NewSum += SumA[i];
+         NewSum += SumA[i];
- 		    T += Terms[i];
+         T += Terms[i];
- 		}
- 	    }
+     }
- 	}
+       }
+   }
- 	// finally, combine NewS and NewSum with S and Sum
+   // finally, combine NewS and NewSum with S and Sum
- 	return 	combine_S(
+   return  combine_S(
- 	    {'S':NewS,'Sum':NewSum, 'M': T ,  'min':min, 'max':max},
+       {'S':NewS,'Sum':NewSum, 'M': T ,  'min':min, 'max':max},
- 	    {'S':S,'Sum':Sum, 'M': M ,  'min':min, 'max':max});
+       {'S':S,'Sum':Sum, 'M': M ,  'min':min, 'max':max});
      }
  
  
@@ -567, +581 @@

  
      */
      function KnuthianOnLineVariance(values, M2, n, mean, min, max,  key){
- 	if(!M2){ M2 = 0; }
+   if(!M2){ M2 = 0; }
- 	if(!n){ n = 0; }
+   if(!n){ n = 0; }
- 	if(!mean){ mean  = 0; }
+   if(!mean){ mean  = 0; }
- 	if(!min){ min = Infinity; }
+   if(!min){ min = Infinity; }
- 	if(!max){ max = -Infinity; }
+   if(!max){ max = -Infinity; }
- 	if(!key){ key = 'risk'; }
+   if(!key){ key = 'risk'; }
  
- 	// this algorithm is apparently a special case of the above
+   // this algorithm is apparently a special case of the above
- 	// pairwise algorithm, in which you just apply one more value
+   // pairwise algorithm, in which you just apply one more value
- 	// to the running total.  I don't know why bun Chan et al
+   // to the running total.  I don't know why bun Chan et al
- 	// (1979) and again in their later paper claim that using M
+   // (1979) and again in their later paper claim that using M
- 	// greater than 1 is always better than not.
+   // greater than 1 is always better than not.
  
- 	// but this code is certainly cleaner!  code based on Scott
+   // but this code is certainly cleaner!  code based on Scott
- 	// Lamb's Java found at
+   // Lamb's Java found at
- 	// http://www.slamb.org/svn/repos/trunk/projects/common/src/java/org/slamb/common/stats/Sample.java
+   // http://www.slamb.org/svn/repos/trunk/projects/common/src/java/org/slamb/common/stats/Sample.java
- 	// but modified a bit
+   // but modified a bit
  
- 	for(var i=0; i<values.length; i++ ){
+   for(var i=0; i<values.length; i++ ){
- 	    var diff = (values[i][key] - mean);
+       var diff = (values[i][key] - mean);
              var newmean = mean +  diff / (n+i+1);
              M2 += diff * (values[i][key] - newmean);
              mean = newmean;
              min = Math.min(values[i][key], min);
              max = Math.max(values[i][key], max);
          }
- 	return {'M2': M2, 'n': n + values.length, 'mean': mean, 'min':min, 'max':max };
+   return {'M2': M2, 'n': n + values.length, 'mean': mean, 'min':min, 'max':max };
      }
  
      function KnuthCombine(partitionA,partitionB){
- 	if(partitionB.n){
+   if(partitionB.n){
- 	    var newn = partitionA.n + partitionB.n;
+       var newn = partitionA.n + partitionB.n;
              var diff = partitionB.mean - partitionA.mean;
              var newmean = partitionA.mean + diff*(partitionB.n/newn)
              var M2 = partitionA.M2 + partitionB.M2 + (diff * diff * partitionA.n * partitionB.n
/ newn );
              min = Math.min(partitionB.min, partitionA.min);
              max = Math.max(partitionB.max, partitionA.max);
- 	    return {'M2': M2, 'n': newn, 'mean': newmean, 'min':min, 'max':max };
+       return {'M2': M2, 'n': newn, 'mean': newmean, 'min':min, 'max':max };
          } else {
              return partitionA;
          }
@@ -622, +636 @@

  
      if(!rereduce)
      {
- 	output = pairwise_update(values);
+   output = pairwise_update(values);
- 	output.variance_n=output.S/output.M;
+   output.variance_n=output.S/output.M;
- 	output.mean = output.Sum/output.M;
+   output.mean = output.Sum/output.M;
- 	knuthOutput = KnuthianOnLineVariance(values);
+   knuthOutput = KnuthianOnLineVariance(values);
- 	knuthOutput.variance_n=knuthOutput.M2/knuthOutput.n;
+   knuthOutput.variance_n=knuthOutput.M2/knuthOutput.n;
- 	output.knuthOutput=knuthOutput;
+   output.knuthOutput=knuthOutput;
  
      } else {
- 	/*
+   /*
             we have an existing pass, so should have multiple outputs to combine  
          */
- 	for(var v in values){
+   for(var v in values){
- 	    output = combine_S(values[v],output);
+       output = combine_S(values[v],output);
- 	    knuthOutput = KnuthCombine(values[v].knuthOutput, knuthOutput);
+       knuthOutput = KnuthCombine(values[v].knuthOutput, knuthOutput);
- 	}
+   }
- 	output.variance_n=output.S/output.M;
+   output.variance_n=output.S/output.M;
- 	output.mean = output.Sum/output.M;
+   output.mean = output.Sum/output.M;
- 	knuthOutput.variance_n=knuthOutput.M2/knuthOutput.n;
+   knuthOutput.variance_n=knuthOutput.M2/knuthOutput.n;
- 	output.knuthOutput=knuthOutput;
+   output.knuthOutput=knuthOutput;
      }
      // and done
      return output;
@@ -661, +675 @@

  == Retrieving documents without a certain field ==
  Sometimes you might need to get a list of documents that '''don't''' have a certain field.
You can do this quite easy by emitting keys that fit the "undefined" condition:
  
+ {{{#!highlight javascript
- {{{
- map
- function(doc)
+ function(doc) {
- {
-   if (doc.field === void 0)
+   if (doc.field === void 0) {
-   {
      emit(doc.id, null);
    }
  }
@@ -674, +685 @@

  
  However, if you have more than just a few fields that need to be tested for abcense you
can use another approach instead of creating a view for each negation:
  
- {{{
+ {{{#!highlight javascript
- function (doc)
+ function (doc) {
- {
    // List of fields to test for abcense in documents, fields specified here will be emitted
as key
    var fields = new Array("type", "role", "etc");
  

Mime
View raw message