couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tonysu...@apache.org
Subject [1/2] couchdb-mango git commit: Provide an ability to disable the indexing of array lengths.
Date Wed, 28 Oct 2015 17:11:30 GMT
Repository: couchdb-mango
Updated Branches:
  refs/heads/master a297e2e9e -> 090dc6750


Provide an ability to disable the indexing of array lengths.

Depending on the data shape, cloudant query would end up creating many
thousands of unique fields and this is leading to JVM heap exhaustion
as Lucene tries to cache information about fields and Lucene is not
designed to handle many thousands fields.
This change allows the user to disable the indexing of array lengths
field. So that they don’t need to take the hit on performance if they
don’t plan to use that field in their queries ($size operator)

Array length field is a single extra field per unique path to an array. The case where we
found this was a client that had data that used arbitrary data as keys which exploded the
number of fields in Lucene. The obvious fix was to switch to only indexing what they wanted
to query on. Unfortunately that didn't prevent the automatically created array length fields
from being created. This patch is a big hammer to remove the auto generated array length fields
which may be generally useful. Though we're also planning on another patch that removes array
length fields for anything that's not specified in the index's field list.

Add index_array_lengths to the list of valid fields in the index
document so that the index document with this field will pass and
enforce the boolean value.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mango/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mango/commit/bf44d0fe
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mango/tree/bf44d0fe
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mango/diff/bf44d0fe

Branch: refs/heads/master
Commit: bf44d0fe3869386e5fdcf99f6c690dc8498f213b
Parents: a297e2e
Author: brkolla <bkolla@cloudant.com>
Authored: Thu Oct 22 20:16:42 2015 -0400
Committer: brkolla <bkolla@cloudant.com>
Committed: Tue Oct 27 14:18:14 2015 -0400

----------------------------------------------------------------------
 src/mango_idx_text.erl    |  6 ++++++
 src/mango_native_proc.erl | 27 +++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/bf44d0fe/src/mango_idx_text.erl
----------------------------------------------------------------------
diff --git a/src/mango_idx_text.erl b/src/mango_idx_text.erl
index fcd2939..9ade6e2 100644
--- a/src/mango_idx_text.erl
+++ b/src/mango_idx_text.erl
@@ -219,6 +219,12 @@ opts() ->
             {optional, true},
             {default, []},
             {validator, fun ?MODULE:validate_fields/1}
+        ]},
+        {<<"index_array_lengths">>, [
+            {tag, index_array_lengths},
+            {optional, true},
+            {default, true},
+            {validator, fun mango_opts:is_boolean/1}
         ]}
     ].
 

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/bf44d0fe/src/mango_native_proc.erl
----------------------------------------------------------------------
diff --git a/src/mango_native_proc.erl b/src/mango_native_proc.erl
index 822d173..6d0fb24 100644
--- a/src/mango_native_proc.erl
+++ b/src/mango_native_proc.erl
@@ -40,6 +40,7 @@
 
 
 -record(tacc, {
+    index_array_lengths = true,
     fields = all_fields,
     path = []
 }).
@@ -164,8 +165,12 @@ get_text_entries({IdxProps}, Doc) ->
 
 get_text_entries0(IdxProps, Doc) ->
     DefaultEnabled = get_default_enabled(IdxProps),
+    IndexArrayLengths = get_index_array_lengths(IdxProps),
     FieldsList = get_text_field_list(IdxProps),
-    TAcc = #tacc{fields = FieldsList},
+    TAcc = #tacc{
+        index_array_lengths = IndexArrayLengths,
+        fields = FieldsList
+    },
     Fields0 = get_text_field_values(Doc, TAcc),
     Fields = if not DefaultEnabled -> Fields0; true ->
         add_default_text_field(Fields0)
@@ -179,13 +184,19 @@ get_text_field_values({Props}, TAcc) when is_list(Props) ->
     get_text_field_values_obj(Props, TAcc, []);
 
 get_text_field_values(Values, TAcc) when is_list(Values) ->
+    IndexArrayLengths = TAcc#tacc.index_array_lengths,
     NewPath = ["[]" | TAcc#tacc.path],
     NewTAcc = TAcc#tacc{path = NewPath},
-    % We bypass make_text_field and directly call make_text_field_name
-    % because the length field name is not part of the path.
-    LengthFieldName = make_text_field_name(NewTAcc#tacc.path, <<"length">>),
-    LengthField = [{LengthFieldName, <<"length">>, length(Values)}],
-    get_text_field_values_arr(Values, NewTAcc, LengthField);
+    case IndexArrayLengths of 
+        true ->
+            % We bypass make_text_field and directly call make_text_field_name
+            % because the length field name is not part of the path.
+            LengthFieldName = make_text_field_name(NewTAcc#tacc.path, <<"length">>),
+            LengthField = [{LengthFieldName, <<"length">>, length(Values)}],
+            get_text_field_values_arr(Values, NewTAcc, LengthField);
+        _ ->
+            get_text_field_values_arr(Values, NewTAcc, [])
+    end;
 
 get_text_field_values(Bin, TAcc) when is_binary(Bin) ->
     make_text_field(TAcc, <<"string">>, Bin);
@@ -227,6 +238,10 @@ get_default_enabled(Props) ->
     end.
 
 
+get_index_array_lengths(Props) ->
+    couch_util:get_value(<<"index_array_lengths">>, Props, true).
+
+
 add_default_text_field(Fields) ->
     DefaultFields = add_default_text_field(Fields, []),
     DefaultFields ++ Fields.


Mime
View raw message