couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [18/30] bear commit: updated refs/heads/import-master to 5f99806
Date Thu, 31 Jul 2014 21:10:09 GMT
bear:statistics_subset/2


Project: http://git-wip-us.apache.org/repos/asf/couchdb-bear/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-bear/commit/9a615049
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-bear/tree/9a615049
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-bear/diff/9a615049

Branch: refs/heads/import-master
Commit: 9a6150495941128d026757486d21602c45b125e0
Parents: b9feed8
Author: Ulf Wiger <ulf@feuerlabs.com>
Authored: Thu Sep 12 15:45:50 2013 +0200
Committer: Ulf Wiger <ulf@feuerlabs.com>
Committed: Mon Nov 4 20:49:47 2013 +0100

----------------------------------------------------------------------
 src/bear.erl | 140 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 113 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-bear/blob/9a615049/src/bear.erl
----------------------------------------------------------------------
diff --git a/src/bear.erl b/src/bear.erl
index 04593e7..4f5baba 100644
--- a/src/bear.erl
+++ b/src/bear.erl
@@ -41,32 +41,7 @@
 
 -compile([native]).
 
-get_statistics(Values) when length(Values) < ?STATS_MIN ->
-    [
-     {min, 0.0},
-     {max, 0.0},
-     {arithmetic_mean, 0.0},
-     {geometric_mean, 0.0},
-     {harmonic_mean, 0.0},
-     {median, 0.0},
-     {variance, 0.0},
-     {standard_deviation, 0.0},
-     {skewness, 0.0},
-     {kurtosis, 0.0},
-     {percentile,
-      [
-       {50, 0.0},
-       {75, 0.0},
-       {90, 0.0},
-       {95, 0.0},
-       {99, 0.0},
-       {999, 0.0}
-      ]
-     },
-     {histogram, [{0, 0}]},
-     {n, 0}
-     ];
-get_statistics(Values) ->
+get_statistics([_,_,_,_,_|_] = Values) ->
     Scan_res = scan_values(Values),
     Scan_res2 = scan_values2(Values, Scan_res),
     Variance = variance(Scan_res, Scan_res2),
@@ -94,7 +69,86 @@ get_statistics(Values) ->
      },
      {histogram, get_histogram(Values, Scan_res, Scan_res2)},
      {n, Scan_res#scan_result.n}
-     ].
+    ];
+get_statistics(Values) when is_list(Values) ->
+    [
+     {min, 0.0},
+     {max, 0.0},
+     {arithmetic_mean, 0.0},
+     {geometric_mean, 0.0},
+     {harmonic_mean, 0.0},
+     {median, 0.0},
+     {variance, 0.0},
+     {standard_deviation, 0.0},
+     {skewness, 0.0},
+     {kurtosis, 0.0},
+     {percentile,
+      [
+       {50, 0.0},
+       {75, 0.0},
+       {90, 0.0},
+       {95, 0.0},
+       {99, 0.0},
+       {999, 0.0}
+      ]
+     },
+     {histogram, [{0, 0}]},
+     {n, 0}
+    ].
+
+get_statistics_subset(Values, Items) ->
+    Length = length(Values),
+    if Length < ?STATS_MIN ->
+	    [I || {K,_} = I <- get_statistics([]),
+		  lists:member(K, Items) orelse K==percentiles];
+       true ->
+	    SortedValues = lists:sort(Values),
+	    Steps = calc_steps(Items),
+	    Scan_res = if Steps > 1 -> scan_values(Values);
+			  true -> []
+		       end,
+	    Scan_res2 = if Steps > 2 -> scan_values2(Values, Scan_res);
+			   true -> []
+			end,
+	    report_subset(Items, Length,
+			  SortedValues, Scan_res, Scan_res2)
+    end.
+
+calc_steps(Items) ->
+    lists:foldl(fun({I,_},Acc) ->
+			erlang:max(level(I), Acc);
+		   (I,Acc) ->
+			erlang:max(level(I), Acc)
+		end, 1, Items).
+
+level(standard_deviation) -> 3;
+level(variance          ) -> 3;
+level(skewness          ) -> 3;
+level(kurtosis          ) -> 3;
+level(histogram         ) -> 3;
+level(arithmetic_mean   ) -> 2;
+level(geometric_mean    ) -> 2;
+level(harmonic_mean     ) -> 2;
+level(_) -> 1.
+
+report_subset(Items, N, SortedValues, Scan_res, Scan_res2) ->
+    lists:map(
+      fun(min) -> {min, hd(SortedValues)};
+	 (max) -> {max, lists:last(SortedValues)};
+	 (arithmetic_mean) -> {arithmetic_mean, arithmetic_mean(Scan_res)};
+	 (harmonic_mean) -> {harmonic_mean, harmonic_mean(Scan_res)};
+	 (geometric_mean) -> {geometric_mean, geometric_mean(Scan_res)};
+	 (median) -> {median, percentile(SortedValues,
+					 #scan_result{n = N}, 0.5)};
+	 (variance) -> {variance, variance(Scan_res, Scan_res2)};
+	 (standard_deviation=I) -> {I, std_deviation(Scan_res, Scan_res2)};
+	 (skewness) -> {skewness, skewness(Scan_res, Scan_res2)};
+	 (kurtosis) -> {kurtosis, kurtosis(Scan_res, Scan_res2)};
+	 ({percentile,Ps}) -> {percentile, percentiles(Ps, N, SortedValues)};
+	 (histogram) ->
+	      {histogram, get_histogram(SortedValues, Scan_res, Scan_res2)};
+	 (n) -> {n, N}
+      end, Items).
 
 get_statistics(Values, _) when length(Values) < ?STATS_MIN ->
     0.0;
@@ -446,3 +500,35 @@ tied_rank_worker([Item|Remainder], Work, PrevValue) ->
             end
     end.
 
+
+percentiles(Ps, N, Values) ->
+    Items = [{P, perc(P, N)} || P <- Ps],
+    pick_items(Values, 1, Items).
+
+pick_items([H|_] = L, P, [{Tag,P}|Ps]) ->
+    [{Tag,H} | pick_items(L, P, Ps)];
+pick_items([_|T], P, Ps) ->
+    pick_items(T, P+1, Ps);
+pick_items([], _, Ps) ->
+    [{Tag,undefined} || {Tag,_} <- Ps].
+
+perc(P, Len) when is_integer(P), 0 =< P, P =< 100 ->
+    V = round(P * Len / 100),
+    erlang:max(1, V);
+perc(P, Len) when is_integer(P), 100 =< P, P =< 1000 ->
+    V = round(P * Len / 1000),
+    erlang:max(1, V);
+perc(P, Len) when is_float(P), 0 =< P, P =< 1 ->
+    erlang:max(1, round(P * Len)).
+
+
+test_values() ->
+    [1,1,1,1,1,1,1,
+     2,2,2,2,2,2,2,
+     3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+     6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+     9,9,9,9,9,9,9].


Mime
View raw message