Return-Path: X-Original-To: apmail-couchdb-commits-archive@www.apache.org Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 134451775E for ; Tue, 3 Feb 2015 15:13:38 +0000 (UTC) Received: (qmail 91346 invoked by uid 500); 3 Feb 2015 15:13:10 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 91202 invoked by uid 500); 3 Feb 2015 15:13:10 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 89320 invoked by uid 99); 3 Feb 2015 15:13:09 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Feb 2015 15:13:09 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 38CB5E082F; Tue, 3 Feb 2015 15:13:09 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: robertkowalski@apache.org To: commits@couchdb.apache.org Date: Tue, 03 Feb 2015 15:13:43 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [36/50] [abbrv] couchdb-mango git commit: Move view specific logic to mango_view_idx Move view specific logic to mango_view_idx A lot of the logic in mango_selector is view specific so this moves it to mango_idx_view to make that more apparent. BugzId: 33294 Project: http://git-wip-us.apache.org/repos/asf/couchdb-mango/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mango/commit/7a9de344 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mango/tree/7a9de344 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mango/diff/7a9de344 Branch: refs/heads/master Commit: 7a9de3447652d31bb517a6975b6cb5c622854681 Parents: 08c3320 Author: Paul J. Davis Authored: Fri Jan 9 12:03:29 2015 -0600 Committer: Paul J. Davis Committed: Fri Jan 16 13:32:49 2015 -0600 ---------------------------------------------------------------------- src/mango_cursor.erl | 19 +--- src/mango_idx_view.erl | 272 +++++++++++++++++++++++++++++++++++++++++++- src/mango_selector.erl | 245 --------------------------------------- 3 files changed, 273 insertions(+), 263 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_cursor.erl ---------------------------------------------------------------------- diff --git a/src/mango_cursor.erl b/src/mango_cursor.erl index 22970c7..8f780b7 100644 --- a/src/mango_cursor.erl +++ b/src/mango_cursor.erl @@ -29,8 +29,8 @@ create(Db, Selector0, Opts) -> Selector = mango_selector:normalize(Selector0), - IndexFields = mango_selector:index_fields(Selector), - FieldRanges = find_field_ranges(Selector, IndexFields), + IndexFields = mango_idx_view:indexable_fields(Selector), + FieldRanges = mango_idx_view:field_ranges(Selector, IndexFields), if IndexFields /= [] -> ok; true -> ?MANGO_ERROR({no_usable_index, operator_unsupported}) @@ -123,21 +123,6 @@ limit_to_sort(ExistingIndexes, UsableIndexes, Sort) -> FinalIndexes. -% For each field, return {Field, Range} -find_field_ranges(Selector, Fields) -> - find_field_ranges(Selector, Fields, []). - -find_field_ranges(_Selector, [], Acc) -> - lists:reverse(Acc); -find_field_ranges(Selector, [Field | Rest], Acc) -> - case mango_selector:range(Selector, Field) of - empty -> - [{Field, empty}]; - Range -> - find_field_ranges(Selector, Rest, [{Field, Range} | Acc]) - end. - - % Any of these indexes may be a composite index. For each % index find the most specific set of fields for each % index. Ie, if an index has columns a, b, c, d, then http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_idx_view.erl ---------------------------------------------------------------------- diff --git a/src/mango_idx_view.erl b/src/mango_idx_view.erl index 993574f..eaa4341 100644 --- a/src/mango_idx_view.erl +++ b/src/mango_idx_view.erl @@ -21,7 +21,11 @@ to_json/1, columns/1, start_key/1, - end_key/1 + end_key/1, + + indexable_fields/1, + field_ranges/1, + field_ranges/2 ]). @@ -171,3 +175,269 @@ make_view(Idx) -> {<<"options">>, {Idx#idx.opts}} ]}, {Idx#idx.name, View}. + + +% This function returns a list of indexes that +% can be used to restrict this query. This works by +% searching the selector looking for field names that +% can be "seen". +% +% Operators that can be seen through are '$and' and any of +% the logical comparisons ('$lt', '$eq', etc). Things like +% '$regex', '$in', '$nin', and '$or' can't be serviced by +% a single index scan so we disallow them. In the future +% we may become more clever and increase our ken such that +% we will be able to see through these with crafty indexes +% or new uses for existing indexes. For instance, I could +% see an '$or' between comparisons on the same field becoming +% the equivalent of a multi-query. But that's for another +% day. + +% We can see through '$and' trivially +indexable_fields({[{<<"$and">>, Args}]}) -> + lists:usort(lists:flatten([indexable_fields(A) || A <- Args])); + +% So far we can't see through any other operator +indexable_fields({[{<<"$", _/binary>>, _}]}) -> + []; + +% If we have a field with a terminator that is locatable +% using an index then the field is a possible index +indexable_fields({[{Field, Cond}]}) -> + case indexable(Cond) of + true -> + [Field]; + false -> + [] + end; + +% An empty selector +indexable_fields({[]}) -> + []. + + +% Check if a condition is indexable. The logical +% comparisons are mostly straight forward. We +% currently don't understand '$in' which is +% theoretically supportable. '$nin' and '$ne' +% aren't currently supported because they require +% multiple index scans. +indexable({[{<<"$lt">>, _}]}) -> + true; +indexable({[{<<"$lte">>, _}]}) -> + true; +indexable({[{<<"$eq">>, _}]}) -> + true; +indexable({[{<<"$gt">>, _}]}) -> + true; +indexable({[{<<"$gte">>, _}]}) -> + true; + +% All other operators are currently not indexable. +% This is also a subtle assertion that we don't +% call indexable/1 on a field name. +indexable({[{<<"$", _/binary>>, _}]}) -> + false. + + +% For each field, return {Field, Range} +field_ranges(Selector) -> + Fields = indexable_fields(Selector), + field_ranges(Selector, Fields). + + +field_ranges(Selector, Fields) -> + field_ranges(Selector, Fields, []). + + +field_ranges(_Selector, [], Acc) -> + lists:reverse(Acc); +field_ranges(Selector, [Field | Rest], Acc) -> + case range(Selector, Field) of + empty -> + [{Field, empty}]; + Range -> + field_ranges(Selector, Rest, [{Field, Range} | Acc]) + end. + + +% Find the complete range for a given index in this +% selector. This works by AND'ing logical comparisons +% together so that we can define the start and end +% keys for a given index. +% +% Selector must have been normalized before calling +% this function. +range(Selector, Index) -> + range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()). + + +% Adjust Low and High based on values found for the +% givend Index in Selector. +range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) -> + lists:foldl(fun + (Arg, {LC, L, HC, H}) -> + range(Arg, Index, LC, L, HC, H); + (_Arg, empty) -> + empty + end, {LCmp, Low, HCmp, High}, Args); + +% We can currently only traverse '$and' operators +range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) -> + {LCmp, Low, HCmp, High}; + +% If the field name matches the index see if we can narrow +% the acceptable range. +range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) -> + range(Cond, LCmp, Low, HCmp, High); + +% Else we have a field unrelated to this index so just +% return the current values. +range(_, _, LCmp, Low, HCmp, High) -> + {LCmp, Low, HCmp, High}. + + +% The comments below are a bit cryptic at first but they show +% where the Arg cand land in the current range. +% +% For instance, given: +% +% {$lt: N} +% Low = 1 +% High = 5 +% +% Depending on the value of N we can have one of five locations +% in regards to a given Low/High pair: +% +% min low mid high max +% +% That is: +% min = (N < Low) +% low = (N == Low) +% mid = (Low < N < High) +% high = (N == High) +% max = (High < N) +% +% If N < 1, (min) then the effective range is empty. +% +% If N == 1, (low) then we have to set the range to empty because +% N < 1 && N >= 1 is an empty set. If the operator had been '$lte' +% and LCmp was '$gte' or '$eq' then we could keep around the equality +% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg. +% +% If 1 < N < 5 (mid), then we set High to Arg and Arg has just +% narrowed our range. HCmp is set the the '$lt' operator that was +% part of the input. +% +% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed +% to be equally or more restrictive than the current possible values +% of '$lt' or '$lte'. +% +% If N > 5 (max), nothing changes as our current range is already +% more narrow than the current condition. +% +% Obviously all of that logic gets tweaked for the other logical +% operators but its all straight forward once you figure out how +% we're basically just narrowing our logical ranges. + +range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) -> + case range_pos(Low, Arg, High) of + min -> + empty; + low -> + empty; + mid -> + {LCmp, Low, '$lt', Arg}; + high -> + {LCmp, Low, '$lt', Arg}; + max -> + {LCmp, Low, HCmp, High} + end; + +range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) -> + case range_pos(Low, Arg, High) of + min -> + empty; + low when LCmp == '$gte'; LCmp == '$eq' -> + {'$eq', Arg, '$eq', Arg}; + low -> + empty; + mid -> + {LCmp, Low, '$lte', Arg}; + high -> + {LCmp, Low, HCmp, High}; + max -> + {LCmp, Low, HCmp, High} + end; + +range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) -> + case range_pos(Low, Arg, High) of + min -> + empty; + low when LCmp == '$gte'; LCmp == '$eq' -> + {'$eq', Arg, '$eq', Arg}; + low -> + empty; + mid -> + {'$eq', Arg, '$eq', Arg}; + high when HCmp == '$lte'; HCmp == '$eq' -> + {'$eq', Arg, '$eq', Arg}; + high -> + empty; + max -> + empty + end; + +range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) -> + case range_pos(Low, Arg, High) of + min -> + {LCmp, Low, HCmp, High}; + low -> + {LCmp, Low, HCmp, High}; + mid -> + {'$gte', Arg, HCmp, High}; + high when HCmp == '$lte'; HCmp == '$eq' -> + {'$eq', Arg, '$eq', Arg}; + high -> + empty; + max -> + empty + end; + +range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) -> + case range_pos(Low, Arg, High) of + min -> + {LCmp, Low, HCmp, High}; + low -> + {'$gt', Arg, HCmp, High}; + mid -> + {'$gt', Arg, HCmp, High}; + high -> + empty; + max -> + empty + end; + +% There's some other un-indexable restriction on the index +% that will be applied as a post-filter. Ignore it and +% carry on our merry way. +range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) -> + {LCmp, Low, HCmp, High}. + + +% Returns the value min | low | mid | high | max depending +% on how Arg compares to Low and High. +range_pos(Low, Arg, High) -> + case mango_json:cmp(Arg, Low) of + N when N < 0 -> min; + N when N == 0 -> low; + _ -> + case mango_json:cmp(Arg, High) of + X when X < 0 -> + mid; + X when X == 0 -> + high; + _ -> + max + end + end. http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/7a9de344/src/mango_selector.erl ---------------------------------------------------------------------- diff --git a/src/mango_selector.erl b/src/mango_selector.erl index d1c9898..dd16bf5 100644 --- a/src/mango_selector.erl +++ b/src/mango_selector.erl @@ -15,8 +15,6 @@ -export([ normalize/1, - index_fields/1, - range/2, match/2 ]). @@ -48,54 +46,6 @@ normalize(Selector) -> end, {NProps}. -% This function returns a list of indexes that -% can be used to restrict this query. This works by -% searching the selector looking for field names that -% can be "seen". -% -% Operators that can be seen through are '$and' and any of -% the logical comparisons ('$lt', '$eq', etc). Things like -% '$regex', '$in', '$nin', and '$or' can't be serviced by -% a single index scan so we disallow them. In the future -% we may become more clever and increase our ken such that -% we will be able to see through these with crafty indexes -% or new uses for existing indexes. For instance, I could -% see an '$or' between comparisons on the same field becoming -% the equivalent of a multi-query. But that's for another -% day. - -% We can see through '$and' trivially -index_fields({[{<<"$and">>, Args}]}) -> - lists:usort(lists:flatten([index_fields(A) || A <- Args])); - -% So far we can't see through any other operator -index_fields({[{<<"$", _/binary>>, _}]}) -> - []; - -% If we have a field with a terminator that is locatable -% using an index then the field is a possible index -index_fields({[{Field, Cond}]}) -> - case indexable(Cond) of - true -> - [Field]; - false -> - [] - end; - -% An empty selector -index_fields({[]}) -> - []. - -% Find the complete range for a given index in this -% selector. This works by AND'ing logical comparisons -% together so that we can define the start and end -% keys for a given index. -% -% Selector must have been normalized before calling -% this function. -range(Selector, Index) -> - range(Selector, Index, '$gt', mango_json:min(), '$lt', mango_json:max()). - % Match a selector against a #doc{} or EJSON value. % This assumes that the Selector has been normalized. @@ -407,201 +357,6 @@ negate({[{Field, Cond}]}) -> {[{Field, negate(Cond)}]}. -% Check if a condition is indexable. The logical -% comparisons are mostly straight forward. We -% currently don't understand '$in' which is -% theoretically supportable. '$nin' and '$ne' -% aren't currently supported because they require -% multiple index scans. -indexable({[{<<"$lt">>, _}]}) -> - true; -indexable({[{<<"$lte">>, _}]}) -> - true; -indexable({[{<<"$eq">>, _}]}) -> - true; -indexable({[{<<"$gt">>, _}]}) -> - true; -indexable({[{<<"$gte">>, _}]}) -> - true; - -% All other operators are currently not indexable. -% This is also a subtle assertion that we don't -% call indexable/1 on a field name. -indexable({[{<<"$", _/binary>>, _}]}) -> - false. - - -% Adjust Low and High based on values found for the -% givend Index in Selector. -range({[{<<"$and">>, Args}]}, Index, LCmp, Low, HCmp, High) -> - lists:foldl(fun - (Arg, {LC, L, HC, H}) -> - range(Arg, Index, LC, L, HC, H); - (_Arg, empty) -> - empty - end, {LCmp, Low, HCmp, High}, Args); - -% We can currently only traverse '$and' operators -range({[{<<"$", _/binary>>}]}, _Index, LCmp, Low, HCmp, High) -> - {LCmp, Low, HCmp, High}; - -% If the field name matches the index see if we can narrow -% the acceptable range. -range({[{Index, Cond}]}, Index, LCmp, Low, HCmp, High) -> - range(Cond, LCmp, Low, HCmp, High); - -% Else we have a field unrelated to this index so just -% return the current values. -range(_, _, LCmp, Low, HCmp, High) -> - {LCmp, Low, HCmp, High}. - - -% The comments below are a bit cryptic at first but they show -% where the Arg cand land in the current range. -% -% For instance, given: -% -% {$lt: N} -% Low = 1 -% High = 5 -% -% Depending on the value of N we can have one of five locations -% in regards to a given Low/High pair: -% -% min low mid high max -% -% That is: -% min = (N < Low) -% low = (N == Low) -% mid = (Low < N < High) -% high = (N == High) -% max = (High < N) -% -% If N < 1, (min) then the effective range is empty. -% -% If N == 1, (low) then we have to set the range to empty because -% N < 1 && N >= 1 is an empty set. If the operator had been '$lte' -% and LCmp was '$gte' or '$eq' then we could keep around the equality -% check on Arg by setting LCmp == HCmp = '$eq' and Low == High == Arg. -% -% If 1 < N < 5 (mid), then we set High to Arg and Arg has just -% narrowed our range. HCmp is set the the '$lt' operator that was -% part of the input. -% -% If N == 5 (high), We just set HCmp to '$lt' since its guaranteed -% to be equally or more restrictive than the current possible values -% of '$lt' or '$lte'. -% -% If N > 5 (max), nothing changes as our current range is already -% more narrow than the current condition. -% -% Obviously all of that logic gets tweaked for the other logical -% operators but its all straight forward once you figure out how -% we're basically just narrowing our logical ranges. - -range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) -> - case range_pos(Low, Arg, High) of - min -> - empty; - low -> - empty; - mid -> - {LCmp, Low, '$lt', Arg}; - high -> - {LCmp, Low, '$lt', Arg}; - max -> - {LCmp, Low, HCmp, High} - end; - -range({[{<<"$lte">>, Arg}]}, LCmp, Low, HCmp, High) -> - case range_pos(Low, Arg, High) of - min -> - empty; - low when LCmp == '$gte'; LCmp == '$eq' -> - {'$eq', Arg, '$eq', Arg}; - low -> - empty; - mid -> - {LCmp, Low, '$lte', Arg}; - high -> - {LCmp, Low, HCmp, High}; - max -> - {LCmp, Low, HCmp, High} - end; - -range({[{<<"$eq">>, Arg}]}, LCmp, Low, HCmp, High) -> - case range_pos(Low, Arg, High) of - min -> - empty; - low when LCmp == '$gte'; LCmp == '$eq' -> - {'$eq', Arg, '$eq', Arg}; - low -> - empty; - mid -> - {'$eq', Arg, '$eq', Arg}; - high when HCmp == '$lte'; HCmp == '$eq' -> - {'$eq', Arg, '$eq', Arg}; - high -> - empty; - max -> - empty - end; - -range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High) -> - case range_pos(Low, Arg, High) of - min -> - {LCmp, Low, HCmp, High}; - low -> - {LCmp, Low, HCmp, High}; - mid -> - {'$gte', Arg, HCmp, High}; - high when HCmp == '$lte'; HCmp == '$eq' -> - {'$eq', Arg, '$eq', Arg}; - high -> - empty; - max -> - empty - end; - -range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) -> - case range_pos(Low, Arg, High) of - min -> - {LCmp, Low, HCmp, High}; - low -> - {'$gt', Arg, HCmp, High}; - mid -> - {'$gt', Arg, HCmp, High}; - high -> - empty; - max -> - empty - end; - -% There's some other un-indexable restriction on the index -% that will be applied as a post-filter. Ignore it and -% carry on our merry way. -range({[{<<"$", _/binary>>, _}]}, LCmp, Low, HCmp, High) -> - {LCmp, Low, HCmp, High}. - - -% Returns the value min | low | mid | high | max depending -% on how Arg compares to Low and High. -range_pos(Low, Arg, High) -> - case mango_json:cmp(Arg, Low) of - N when N < 0 -> min; - N when N == 0 -> low; - _ -> - case mango_json:cmp(Arg, High) of - X when X < 0 -> - mid; - X when X == 0 -> - high; - _ -> - max - end - end. - - match({[{<<"$and">>, Args}]}, Value, Cmp) -> Pred = fun(SubSel) -> match(SubSel, Value, Cmp) end, lists:all(Pred, Args);