couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From robertkowal...@apache.org
Subject [44/50] [abbrv] couchdb-mango git commit: Support Text Index Creation
Date Tue, 03 Feb 2015 15:13:51 GMT
Support Text Index Creation

Add support for new index type based on Lucene text indexes. This feature
allows users to perform full text search and also improves our ability to
answer complex queries that were preivously not possible.

33294-query-text-search


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mango/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mango/commit/aa4edf42
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mango/tree/aa4edf42
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mango/diff/aa4edf42

Branch: refs/heads/master
Commit: aa4edf42b736bb1a1ffa86248f0895d4ea028259
Parents: bb91429
Author: Tony Sun <llamage@Tonys-MacBook-Pro.local>
Authored: Thu Aug 7 13:11:09 2014 -0700
Committer: Paul J. Davis <paul.joseph.davis@gmail.com>
Committed: Fri Jan 16 13:41:29 2015 -0600

----------------------------------------------------------------------
 src/mango_cursor.erl                   |   7 +-
 src/mango_cursor_text.erl              | 301 +++++++++++++++
 src/mango_error.erl                    | 122 +++++-
 src/mango_fields.erl                   |   2 +
 src/mango_idx.erl                      |  12 +-
 src/mango_idx_text.erl                 | 256 +++++++++++++
 src/mango_native_proc.erl              | 182 +++++++++
 src/mango_opts.erl                     |  17 +
 src/mango_selector.erl                 |   5 +-
 src/mango_selector_text.erl            | 347 +++++++++++++++++
 src/mango_util.erl                     |  68 +++-
 test/02-basic-find-test.py             |   2 +-
 test/04-key-tests.py                   |  98 ++++-
 test/05-index-selection-test.py        |  50 +++
 test/06-basic-text-test.py             | 488 ++++++++++++++++++++++++
 test/06-text-default-field-test.py     |  70 ++++
 test/07-text-custom-field-list-test.py |  62 +++
 test/08-text-limit-test.py             | 134 +++++++
 test/09-text-sort-test.py              |  89 +++++
 test/friend_docs.py                    | 568 ++++++++++++++++++++++++++++
 test/limit_docs.py                     | 408 ++++++++++++++++++++
 test/mango.py                          |  41 +-
 test/user_docs.py                      |  14 +-
 23 files changed, 3286 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_cursor.erl
----------------------------------------------------------------------
diff --git a/src/mango_cursor.erl b/src/mango_cursor.erl
index 72ee8bb..545a863 100644
--- a/src/mango_cursor.erl
+++ b/src/mango_cursor.erl
@@ -119,8 +119,13 @@ group_indexes_by_type(Indexes) ->
     IdxDict = lists:foldl(fun(I, D) ->
         dict:append(mango_idx:cursor_mod(I), I, D)
     end, dict:new(), Indexes),
+    % The first cursor module that has indexes will be
+    % used to service this query. This is so that we
+    % don't suddenly switch indexes for existing client
+    % queries.
     CursorModules = [
-        mango_cursor_view
+        mango_cursor_view,
+        mango_cursor_text
     ],
     lists:flatmap(fun(CMod) ->
         case dict:find(CMod, IdxDict) of

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_cursor_text.erl
----------------------------------------------------------------------
diff --git a/src/mango_cursor_text.erl b/src/mango_cursor_text.erl
new file mode 100644
index 0000000..7c1b992
--- /dev/null
+++ b/src/mango_cursor_text.erl
@@ -0,0 +1,301 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mango_cursor_text).
+
+-export([
+    create/4,
+    explain/1,
+    execute/3
+]).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("dreyfus/include/dreyfus.hrl").
+-include("mango_cursor.hrl").
+-include("mango.hrl").
+
+
+-record(cacc, {
+    selector,
+    dbname,
+    ddocid,
+    idx_name,
+    query_args,
+    bookmark,
+    limit,
+    skip,
+    user_fun,
+    user_acc
+}).
+
+
+create(Db, Indexes, Selector, Opts0) ->
+    Index = case Indexes of
+        [Index0] ->
+            Index0;
+        _ ->
+            ?MANGO_ERROR(multiple_text_indexes)
+    end,
+
+    Opts = unpack_bookmark(Db#db.name, Opts0),
+
+    % Limit the result set size to 50 for Clouseau's
+    % sake. We may want to revisit this.
+    Limit0 = couch_util:get_value(limit, Opts, 50),
+    Limit = if Limit0 < 50 -> Limit0; true -> 50 end,
+    Skip = couch_util:get_value(skip, Opts, 0),
+    Fields = couch_util:get_value(fields, Opts, all_fields),
+
+    {ok, #cursor{
+        db = Db,
+        index = Index,
+        ranges = null,
+        selector = Selector,
+        opts = Opts,
+        limit = Limit,
+        skip = Skip,
+        fields = Fields
+    }}.
+
+
+explain(Cursor) ->
+    #cursor{
+        selector = Selector,
+        opts = Opts
+    } = Cursor,
+    [
+        {'query', mango_selector_text:convert(Selector)},
+        {sort, sort_query(Opts, Selector)}
+    ].
+
+
+execute(Cursor, UserFun, UserAcc) ->
+    #cursor{
+        db = Db,
+        index = Idx,
+        limit = Limit,
+        skip = Skip,
+        selector = Selector,
+        opts = Opts
+    } = Cursor,
+    QueryArgs = #index_query_args{
+        q = mango_selector_text:convert(Selector),
+        sort = sort_query(Opts, Selector),
+        raw_bookmark = true
+    },
+    CAcc = #cacc{
+        selector = Selector,
+        dbname = Db#db.name,
+        ddocid = ddocid(Idx),
+        idx_name = mango_idx:name(Idx),
+        bookmark = get_bookmark(Opts),
+        limit = Limit,
+        skip = Skip,
+        query_args = QueryArgs,
+        user_fun = UserFun,
+        user_acc = UserAcc
+    },
+    try
+        execute(CAcc)
+    catch
+        throw:{stop, FinalCAcc} ->
+            #cacc{
+                bookmark = FinalBM,
+                user_fun = UserFun,
+                user_acc = LastUserAcc
+            } = FinalCAcc,
+            JsonBM = dreyfus_bookmark:pack(FinalBM),
+            Arg = {add_key, bookmark, JsonBM},
+            {_Go, FinalUserAcc} = UserFun(Arg, LastUserAcc),
+            {ok, FinalUserAcc}
+    end.
+
+
+execute(CAcc) ->
+    case search_docs(CAcc) of
+        {ok, Bookmark, []} ->
+            % If we don't have any results from the
+            % query it means the request has paged through
+            % all possible results and the request is over.
+            NewCAcc = CAcc#cacc{bookmark = Bookmark},
+            throw({stop, NewCAcc});
+        {ok, Bookmark, Hits} ->
+            NewCAcc = CAcc#cacc{bookmark = Bookmark},
+            HitDocs = get_json_docs(CAcc#cacc.dbname, Hits),
+            {ok, FinalCAcc} = handle_hits(NewCAcc, HitDocs),
+            execute(FinalCAcc)
+    end.
+
+
+search_docs(CAcc) ->
+    #cacc{
+        dbname = DbName,
+        ddocid = DDocId,
+        idx_name = IdxName
+    } = CAcc,
+    QueryArgs = update_query_args(CAcc),
+    case dreyfus_fabric_search:go(DbName, DDocId, IdxName, QueryArgs) of
+        {ok, Bookmark, _, Hits, _, _} ->
+            {ok, Bookmark, Hits};
+        {error, Reason} ->
+            ?MANGO_ERROR({text_search_error, {error, Reason}})
+    end.
+
+
+handle_hits(CAcc, []) ->
+    {ok, CAcc};
+
+handle_hits(CAcc0, [{Sort, Doc} | Rest]) ->
+    CAcc1 = handle_hit(CAcc0, Sort, Doc),
+    handle_hits(CAcc1, Rest).
+
+
+handle_hit(CAcc0, Sort, Doc) ->
+    #cacc{
+        limit = Limit,
+        skip = Skip
+    } = CAcc0,
+    CAcc1 = update_bookmark(CAcc0, Sort),
+    case mango_selector:match(CAcc1#cacc.selector, Doc) of
+        true when Skip > 0 ->
+            CAcc1#cacc{skip = Skip - 1};
+        true when Limit == 0 ->
+            % We hit this case if the user spcified with a
+            % zero limit. Notice that in this case we need
+            % to return the bookmark from before this match
+            throw({stop, CAcc0});
+        true when Limit == 1 ->
+            NewCAcc = apply_user_fun(CAcc1, Doc),
+            throw({stop, NewCAcc});
+        true when Limit > 1 ->
+            NewCAcc = apply_user_fun(CAcc1, Doc),
+            NewCAcc#cacc{limit = Limit - 1};
+        false ->
+            CAcc1
+    end.
+
+
+apply_user_fun(CAcc, Doc) ->
+    #cacc{
+        user_fun = UserFun,
+        user_acc = UserAcc
+    } = CAcc,
+    case UserFun({row, Doc}, UserAcc) of
+        {ok, NewUserAcc} ->
+            CAcc#cacc{user_acc = NewUserAcc};
+        {stop, NewUserAcc} ->
+            throw({stop, CAcc#cacc{user_acc = NewUserAcc}})
+    end.
+
+
+%% Convert Query to Dreyfus sort specifications
+%% Covert <<"Field">>, <<"desc">> to <<"-Field">>
+%% and append to the dreyfus query
+sort_query(Opts, Selector) ->
+    {sort, {Sort}} = lists:keyfind(sort, 1, Opts),
+    SortList = lists:map(fun(SortField) ->
+        RawSortField = case SortField of
+            {Field, <<"asc">>} -> Field;
+            {Field, <<"desc">>} -> <<"-", Field/binary>>;
+            Field when is_binary(Field) -> Field
+        end,
+        mango_selector_text:append_sort_type(RawSortField, Selector)
+    end, Sort),
+    case SortList of
+        [] -> relevance;
+        _ -> SortList
+    end.
+
+
+get_bookmark(Opts) ->
+    case lists:keyfind(bookmark, 1, Opts) of
+        {_, BM} when is_list(BM), BM /= [] ->
+            BM;
+        _ ->
+            nil
+    end.
+
+
+update_bookmark(CAcc, Sortable) ->
+    BM = CAcc#cacc.bookmark,
+    QueryArgs = CAcc#cacc.query_args,
+    Sort = QueryArgs#index_query_args.sort,
+    NewBM = dreyfus_bookmark:update(Sort, BM, [Sortable]),
+    CAcc#cacc{bookmark = NewBM}.
+
+
+pack_bookmark(Bookmark) ->
+    case dreyfus_bookmark:pack(Bookmark) of
+        null -> nil;
+        Enc -> Enc
+    end.
+
+
+unpack_bookmark(DbName, Opts) ->
+    NewBM = case lists:keyfind(bookmark, 1, Opts) of
+        {_, nil} ->
+            [];
+        {_, Bin} ->
+            try
+                dreyfus_bookmark:unpack(DbName, Bin)
+            catch _:_ ->
+                ?MANGO_ERROR({invalid_bookmark, Bin})
+            end
+    end,
+    lists:keystore(bookmark, 1, Opts, {bookmark, NewBM}).
+
+
+ddocid(Idx) ->
+    case mango_idx:ddoc(Idx) of
+        <<"_design/", Rest/binary>> ->
+            Rest;
+        Else ->
+            Else
+    end.
+
+
+update_query_args(CAcc) ->
+    #cacc{
+        bookmark = Bookmark,
+        query_args = QueryArgs
+    } = CAcc,
+    QueryArgs#index_query_args{
+        bookmark = pack_bookmark(Bookmark),
+        limit = get_limit(CAcc)
+    }.
+
+
+get_limit(CAcc) ->
+    Total = CAcc#cacc.limit + CAcc#cacc.skip,
+    if
+        Total < 25 -> 25;
+        Total > 100 -> 100;
+        true -> Total
+    end.
+
+
+get_json_docs(DbName, Hits) ->
+    Ids = lists:map(fun(#sortable{item = Item}) ->
+        couch_util:get_value(<<"_id">>, Item#hit.fields)
+    end, Hits),
+    {ok, IdDocs} = dreyfus_fabric:get_json_docs(DbName, Ids),
+    lists:map(fun(#sortable{item = Item} = Sort) ->
+        Id = couch_util:get_value(<<"_id">>, Item#hit.fields),
+        case lists:keyfind(Id, 1, IdDocs) of
+            {Id, {doc, Doc}} ->
+                {Sort, Doc};
+            false ->
+                {Sort, not_found}
+        end
+    end, Hits).
+

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_error.erl
----------------------------------------------------------------------
diff --git a/src/mango_error.erl b/src/mango_error.erl
index 778df2d..8aebfb9 100644
--- a/src/mango_error.erl
+++ b/src/mango_error.erl
@@ -13,44 +13,63 @@
 -module(mango_error).
 
 
+-include_lib("couch/include/couch_db.hrl").
+
+
 -export([
     info/2
 ]).
 
 
-info(mango_cursor, {no_usable_index, operator_unsupported}) ->
+info(mango_cursor, {no_usable_index, no_indexes_defined}) ->
     {
         400,
         <<"no_usable_index">>,
-        <<"There is no operator in this selector can used with an index.">>
+        <<"There are no indexes defined in this database.">>
     };
-info(mango_cursor, {no_usable_index, selector_unsupported}) ->
+info(mango_cursor, {no_usable_index, no_index_matching_name}) ->
     {
         400,
         <<"no_usable_index">>,
-        <<"There is no index available for this selector.">>
+        <<"No index matches the index specified with \"use_index\"">>
     };
-info(mango_cursor, {no_usable_index, sort_field}) ->
+info(mango_cursor, {no_usable_index, missing_sort_index}) ->
     {
         400,
         <<"no_usable_index">>,
-        <<"No index can satisfy both the selector and sort specified.">>
+        <<"No index exists for this sort, try indexing by the sort fields.">>
     };
-info(mango_cursor, {no_usable_index, {sort, Fields}}) ->
-    S0 = [binary_to_list(F) || F <- Fields],
-    S1 = string:join(S0, ", "),
+info(mango_cursor, {no_usable_index, selector_unsupported}) ->
     {
         400,
         <<"no_usable_index">>,
-        fmt("No index exists for this sort, try indexing: ~s", [S1])
+        <<"There is no index available for this selector.">>
+    };
+
+info(mango_cursor_text, {invalid_bookmark, BadBookmark}) ->
+    {
+        400,
+        <<"invalid_bookmark">>,
+        fmt("Invalid boomkark value: ~s", [?JSON_ENCODE(BadBookmark)])
     };
-info(mango_cursor, {no_usable_index, {fields, Possible}}) ->
-    S0 = [binary_to_list(P) || P <- Possible],
-    S1 = string:join(S0, ", "),
+info(mango_cursor_text, multiple_text_indexes) ->
     {
         400,
-        <<"no_usable_index">>,
-        fmt("No index exists for this selector, try indexing one of: ~s", [S1])
+        <<"multiple_text_indexes">>,
+        <<"You must specify an index with the `use_index` parameter.">>
+    };
+info(mango_cursor_text, {text_search_error, {error, {bad_request, Msg}}}) 
+        when is_binary(Msg) ->
+    {
+        400,
+        <<"text_search_error">>,
+        Msg
+    };
+info(mango_cursor_text, {text_search_error, {error, Error}}) ->
+    {
+        400,
+        <<"text_search_error">>,
+        fmt("Error performing text search: ~p", [Error])
     };
 
 info(mango_fields, {invalid_fields_json, BadFields}) ->
@@ -88,23 +107,48 @@ info(mango_httpd, {error_saving_ddoc, Reason}) ->
 info(mango_idx, {invalid_index_type, BadType}) ->
     {
         400,
-        <<"invalid_index_type">>,
+        <<"invalid_index">>,
         fmt("Invalid type for index: ~s", [BadType])
     };
+info(mango_idx, invalid_query_ddoc_language) ->
+    {
+        400,
+        <<"invalid_index">>,
+        <<"Invalid design document query language.">>
+    };
+info(mango_idx, no_index_definition) ->
+    {
+        400,
+        <<"invalid_index">>,
+        <<"Index is missing its definition.">>
+    };
 
 info(mango_idx_view, {invalid_index_json, BadIdx}) ->
     {
         400,
-        <<"invalid_index_json">>,
+        <<"invalid_index">>,
         fmt("JSON indexes must be an object, not: ~w", [BadIdx])
     };
 info(mango_idx_view, {index_not_found, BadIdx}) ->
     {
         404,
-        <<"index_not_found">>,
+        <<"invalid_index">>,
         fmt("JSON index ~s not found in this design doc.", [BadIdx])
     };
 
+info(mango_idx_text, {invalid_index_text, BadIdx}) ->
+    {
+        400,
+        <<"invalid_index">>,
+        fmt("Text indexes must be an object, not: ~w", [BadIdx])
+    };
+info(mango_idx_text, {index_not_found, BadIdx}) ->
+    {
+        404,
+        <<"index_not_found">>,
+        fmt("Text index ~s not found in this design doc.", [BadIdx])
+    };
+
 info(mango_opts, {invalid_ejson, Val}) ->
     {
         400,
@@ -171,6 +215,20 @@ info(mango_opts, {invalid_selector_json, BadSel}) ->
         <<"invalid_selector_json">>,
         fmt("Selector must be a JSON object, not: ~w", [BadSel])
     };
+info(mango_opts, {invalid_index_name, BadName}) ->
+    {
+        400,
+        <<"invalid_index_name">>,
+        fmt("Invalid index name: ~w", [BadName])
+    };
+
+info(mango_opts, {multiple_text_operator, {invalid_selector, BadSel}}) ->
+    {
+        400,
+        <<"multiple_text_selector">>,
+        fmt("Selector cannot contain more than one $text operator: ~w",
+            [BadSel])
+    };
 
 info(mango_selector, {invalid_selector, missing_field_name}) ->
     {
@@ -203,6 +261,22 @@ info(mango_selector, {bad_field, BadSel}) ->
         fmt("Invalid field normalization on selector: ~w", [BadSel])
     };
 
+info(mango_selector_text, {invalid_operator, Op}) ->
+    {
+        400,
+        <<"invalid_operator">>,
+        fmt("Invalid text operator: ~s", [Op])
+    };
+info(mango_selector_text, {text_sort_error, Field}) ->
+    S = binary_to_list(Field),
+    Msg = "Unspecified or ambiguous sort type. Try appending :number or"
+        " :string to the sort field. ~s",
+    {
+        400,
+        <<"text_sort_error">>,
+        fmt(Msg, [S])
+    };
+
 info(mango_sort, {invalid_sort_json, BadSort}) ->
     {
         400,
@@ -228,6 +302,18 @@ info(mango_sort, {unsupported, mixed_sort}) ->
         <<"Sorts currently only support a single direction for all fields.">>
     };
 
+info(mango_util, {error_loading_doc, DocId}) ->
+    {
+        500,
+        <<"internal_error">>,
+        fmt("Error loading doc: ~s", [DocId])
+    };
+info(mango_util, error_loading_ddocs) ->
+    {
+        500,
+        <<"internal_error">>,
+        <<"Error loading design documents">>
+    };
 info(mango_util, {invalid_ddoc_lang, Lang}) ->
     {
         400,

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_fields.erl
----------------------------------------------------------------------
diff --git a/src/mango_fields.erl b/src/mango_fields.erl
index 46049af..8b6a00b 100644
--- a/src/mango_fields.erl
+++ b/src/mango_fields.erl
@@ -49,5 +49,7 @@ extract(Doc, Fields) ->
 
 field(Val) when is_binary(Val) ->
     Val;
+field({Val}) when is_list(Val) ->
+    {Val};
 field(Else) ->
     ?MANGO_ERROR({invalid_field_json, Else}).

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_idx.erl
----------------------------------------------------------------------
diff --git a/src/mango_idx.erl b/src/mango_idx.erl
index 902fb75..8e644c3 100644
--- a/src/mango_idx.erl
+++ b/src/mango_idx.erl
@@ -144,7 +144,7 @@ from_ddoc(Db, {Props}) ->
             ?MANGO_ERROR(invalid_query_ddoc_language)
     end,
 
-    IdxMods = [mango_idx_view],
+    IdxMods = [mango_idx_view, mango_idx_text],
     Idxs = lists:flatmap(fun(Mod) -> Mod:from_ddoc({Props}) end, IdxMods),
     lists:map(fun(Idx) ->
         Idx#idx{
@@ -218,13 +218,17 @@ end_key(#idx{}=Idx, Ranges) ->
 cursor_mod(#idx{type = <<"json">>}) ->
     mango_cursor_view;
 cursor_mod(#idx{def = all_docs, type= <<"special">>}) ->
-    mango_cursor_view.
+    mango_cursor_view;
+cursor_mod(#idx{type = <<"text">>}) ->
+    mango_cursor_text.
 
 
 idx_mod(#idx{type = <<"json">>}) ->
     mango_idx_view;
 idx_mod(#idx{type = <<"special">>}) ->
-    mango_idx_special.
+    mango_idx_special;
+idx_mod(#idx{type = <<"text">>}) ->
+    mango_idx_text.
 
 
 db_to_name(#db{name=Name}) ->
@@ -247,7 +251,7 @@ get_idx_def(Opts) ->
 get_idx_type(Opts) ->
     case proplists:get_value(type, Opts) of
         <<"json">> -> <<"json">>;
-        %<<"text">> -> <<"text">>;
+        <<"text">> -> <<"text">>;
         %<<"geo">> -> <<"geo">>;
         undefined -> <<"json">>;
         BadType ->

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_idx_text.erl
----------------------------------------------------------------------
diff --git a/src/mango_idx_text.erl b/src/mango_idx_text.erl
new file mode 100644
index 0000000..507e0c2
--- /dev/null
+++ b/src/mango_idx_text.erl
@@ -0,0 +1,256 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mango_idx_text).
+
+
+-export([
+    validate/1,
+    add/2,
+    remove/2,
+    from_ddoc/1,
+    to_json/1,
+    columns/1,
+    is_usable/2,
+    get_default_field_options/1
+]).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include("mango.hrl").
+-include("mango_idx.hrl").
+
+
+validate(#idx{}=Idx) ->
+    {ok, Def} = do_validate(Idx#idx.def),
+    {ok, Idx#idx{def=Def}}.
+
+
+add(#doc{body={Props0}}=DDoc, Idx) ->
+    Texts1 = case proplists:get_value(<<"indexes">>, Props0) of
+        {Texts0} -> Texts0;
+        _ -> []
+    end,
+    NewText = make_text(Idx),
+    Texts2 = lists:keystore(element(1, NewText), 1, Texts1, NewText),
+    Props1 = lists:keystore(<<"indexes">>, 1, Props0, {<<"indexes">>,
+        {Texts2}}),
+    {ok, DDoc#doc{body={Props1}}}.
+
+
+remove(#doc{body={Props0}}=DDoc, Idx) ->
+    Texts1 = case proplists:get_value(<<"indexes">>, Props0) of
+        {Texts0} ->
+            Texts0;
+        _ ->
+            ?MANGO_ERROR({index_not_found, Idx#idx.name})
+    end,
+    Texts2 = lists:keydelete(Idx#idx.name, 1, Texts1),
+    if Texts2 /= Texts1 -> ok; true ->
+        ?MANGO_ERROR({index_not_found, Idx#idx.name})
+    end,
+    Props1 = case Texts2 of
+        [] ->
+            lists:keydelete(<<"indexes">>, 1, Props0);
+        _ ->
+            lists:keystore(<<"indexes">>, 1, Props0, {<<"indexes">>, {Texts2}})
+    end,
+    {ok, DDoc#doc{body={Props1}}}.
+
+
+from_ddoc({Props}) ->
+    case lists:keyfind(<<"indexes">>, 1, Props) of
+        {<<"indexes">>, {Texts}} when is_list(Texts) ->
+            lists:flatmap(fun({Name, {VProps}}) ->
+                Def = proplists:get_value(<<"index">>, VProps),
+                I = #idx{
+                    type = <<"text">>,
+                    name = Name,
+                    def = Def
+                },
+                % TODO: Validate the index definition
+                [I]
+            end, Texts);
+        _ ->
+            []
+    end.
+
+
+to_json(Idx) ->
+    {[
+        {ddoc, Idx#idx.ddoc},
+        {name, Idx#idx.name},
+        {type, Idx#idx.type},
+        {def, {def_to_json(Idx#idx.def)}}
+    ]}.
+
+
+columns(Idx) ->
+    {Props} = Idx#idx.def,
+    {<<"fields">>, Fields} = lists:keyfind(<<"fields">>, 1, Props),
+    case Fields of
+        <<"all_fields">> ->
+            all_fields;
+        _ ->
+            {DFProps} = couch_util:get_value(<<"default_field">>, Props, {[]}),
+            Enabled = couch_util:get_value(<<"enabled">>, DFProps, true),
+            Default = case Enabled of
+                true -> [<<"$default">>];
+                false -> []
+            end,
+            Default ++ lists:map(fun({FProps}) ->
+                {_, Name} = lists:keyfind(<<"name">>, 1, FProps),
+                {_, Type} = lists:keyfind(<<"type">>, 1, FProps),
+                iolist_to_binary([Name, ":", Type])
+            end, Fields)
+    end.
+
+
+is_usable(Idx, Selector) ->
+    case columns(Idx) of
+        all_fields ->
+            true;
+        Cols ->
+            Fields = indexable_fields(Selector),
+            sets:is_subset(sets:from_list(Fields), sets:from_list(Cols))
+    end.
+
+
+do_validate({Props}) ->
+    {ok, Opts} = mango_opts:validate(Props, opts()),
+    {ok, {Opts}};
+do_validate(Else) ->
+    ?MANGO_ERROR({invalid_index_text, Else}).
+
+
+def_to_json({Props}) ->
+    def_to_json(Props);
+def_to_json([]) ->
+    [];
+def_to_json([{<<"fields">>, <<"all_fields">>} | Rest]) ->
+    [{<<"fields">>, []} | def_to_json(Rest)];
+def_to_json([{fields, Fields} | Rest]) ->
+    [{<<"fields">>, mango_sort:to_json(Fields)} | def_to_json(Rest)];
+def_to_json([{<<"fields">>, Fields} | Rest]) ->
+    [{<<"fields">>, mango_sort:to_json(Fields)} | def_to_json(Rest)];
+def_to_json([{Key, Value} | Rest]) ->
+    [{Key, Value} | def_to_json(Rest)].
+
+
+opts() ->
+    [
+        {<<"default_analyzer">>, [
+            {tag, default_analyzer},
+            {optional, true},
+            {default, <<"keyword">>}
+        ]},
+        {<<"default_field">>, [
+            {tag, default_field},
+            {optional, true},
+            {default, {[]}}
+        ]},
+         {<<"selector">>, [
+            {tag, selector},
+            {optional, true},
+            {default, {[]}},
+            {validator, fun mango_opts:validate_selector/1}
+        ]},
+        {<<"fields">>, [
+            {tag, fields},
+            {optional, true},
+            {default, []},
+            {validator, fun mango_opts:validate_fields/1}
+        ]}
+    ].
+
+
+make_text(Idx) ->
+    Text= {[
+        {<<"index">>, Idx#idx.def},
+        {<<"analyzer">>, construct_analyzer(Idx#idx.def)}
+    ]},
+    {Idx#idx.name, Text}.
+
+
+get_default_field_options(Props) ->
+    Default = couch_util:get_value(default_field, Props, {[]}),
+    case Default of
+        Bool when is_boolean(Bool) ->
+            {Bool, <<"standard">>};
+        {[]} ->
+            {true, <<"standard">>};
+        {Opts}->
+            Enabled = couch_util:get_value(<<"enabled">>, Opts, true),
+            Analyzer = couch_util:get_value(<<"analyzer">>, Opts,
+                <<"standard">>),
+            {Enabled, Analyzer}
+    end.
+
+
+construct_analyzer({Props}) ->
+    DefaultAnalyzer = couch_util:get_value(default_analyzer, Props,
+        <<"keyword">>),
+    {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props),
+    DefaultAnalyzerDef = case DefaultField of
+        true ->
+            [{<<"$default">>, DefaultFieldAnalyzer}];
+        _ ->
+            []
+    end,
+    case DefaultAnalyzerDef of
+        [] ->
+            <<"keyword">>;
+        _ ->
+            {[
+                {<<"name">>, <<"perfield">>},
+                {<<"default">>, DefaultAnalyzer},
+                {<<"fields">>, {DefaultAnalyzerDef}}
+            ]}
+    end.
+
+
+indexable_fields(Selector) ->
+    TupleTree = mango_selector_text:convert([], Selector),
+    indexable_fields([], TupleTree).
+
+
+indexable_fields(Fields, {op_and, Args}) when is_list(Args) ->
+    lists:foldl(fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
+        Fields, Args);
+
+indexable_fields(Fields, {op_or, Args}) when is_list(Args) ->
+    lists:foldl(fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
+        Fields, Args);
+
+indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
+    Fields0 = indexable_fields(Fields, ExistsQuery),
+    indexable_fields(Fields0, Arg);
+
+indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) ->
+    Fields;
+
+indexable_fields(Fields, {op_field, {Name, _}}) ->
+    [iolist_to_binary(Name) | Fields];
+
+%% In this particular case, the lucene index is doing a field_exists query
+%% so it is looking at all sorts of combinations of field:* and field.*
+%% We don't add the field because we cannot pre-determine what field will exist.
+%% Hence we just return Fields and make it less restrictive.
+indexable_fields(Fields, {op_fieldname, {_, _}}) ->
+    Fields;
+
+%% Similar idea to op_fieldname but with fieldname:null
+indexable_fields(Fields, {op_null, {_, _}}) ->
+    Fields;
+
+indexable_fields(Fields, {op_default, _}) ->
+    [<<"$default">> | Fields].

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_native_proc.erl
----------------------------------------------------------------------
diff --git a/src/mango_native_proc.erl b/src/mango_native_proc.erl
index 636da5c..3e189bd 100644
--- a/src/mango_native_proc.erl
+++ b/src/mango_native_proc.erl
@@ -36,6 +36,12 @@
 }).
 
 
+-record(tacc, {
+    fields = all_fields,
+    path = []
+}).
+
+
 start_link() ->
     gen_server:start_link(?MODULE, [], []).
 
@@ -79,6 +85,9 @@ handle_call({prompt, [<<"reduce">>, _, _]}, _From, St) ->
 handle_call({prompt, [<<"rereduce">>, _, _]}, _From, St) ->
     {reply, null, St};
 
+handle_call({prompt, [<<"index_doc">>, Doc]}, _From, St) ->
+    {reply, index_doc(St, mango_json:to_binary(Doc)), St};
+
 handle_call(Msg, _From, St) ->
     {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}.
 
@@ -103,6 +112,10 @@ map_doc(#st{indexes=Indexes}, Doc) ->
     lists:map(fun(Idx) -> get_index_entries(Idx, Doc) end, Indexes).
 
 
+index_doc(#st{indexes=Indexes}, Doc) ->
+    lists:map(fun(Idx) -> get_text_entries(Idx, Doc) end, Indexes).
+
+
 get_index_entries({IdxProps}, Doc) ->
     {Fields} = couch_util:get_value(<<"fields">>, IdxProps),
     Values = lists:map(fun({Field, _Dir}) ->
@@ -118,3 +131,172 @@ get_index_entries({IdxProps}, Doc) ->
         false ->
             [[Values, null]]
     end.
+
+
+get_text_entries({IdxProps}, Doc) ->
+    Selector = case couch_util:get_value(<<"selector">>, IdxProps) of
+        [] -> {[]};
+        Else -> Else
+    end,
+    case should_index(Selector, Doc) of
+        true ->
+            get_text_entries0(IdxProps, Doc);
+        false ->
+            []
+    end.
+
+
+get_text_entries0(IdxProps, Doc) ->
+    DefaultEnabled = get_default_enabled(IdxProps),
+    FieldsList = get_text_field_list(IdxProps),
+    TAcc = #tacc{fields = FieldsList},
+    Fields0 = get_text_field_values(Doc, TAcc),
+    Fields = if not DefaultEnabled -> Fields0; true ->
+        add_default_text_field(Fields0)
+    end,
+    FieldNames = get_field_names(Fields, []),
+    Converted = convert_text_fields(Fields),
+    FieldNames ++ Converted.
+
+
+get_text_field_values({Props}, TAcc) when is_list(Props) ->
+    get_text_field_values_obj(Props, TAcc, []);
+
+get_text_field_values(Values, TAcc) when is_list(Values) ->
+    NewPath = ["[]" | TAcc#tacc.path],
+    NewTAcc = TAcc#tacc{path = NewPath},
+    % We bypass make_text_field and directly call make_text_field_name
+    % because the length field name is not part of the path.
+    LengthFieldName = make_text_field_name(NewTAcc#tacc.path, <<"length">>),
+    EncLFN = mango_util:lucene_escape_field(LengthFieldName),
+    LengthField = [{EncLFN, <<"length">>, length(Values)}],
+    get_text_field_values_arr(Values, NewTAcc, LengthField);
+
+get_text_field_values(Bin, TAcc) when is_binary(Bin) ->
+    make_text_field(TAcc, <<"string">>, Bin);
+
+get_text_field_values(Num, TAcc) when is_number(Num) ->
+    make_text_field(TAcc, <<"number">>, Num);
+
+get_text_field_values(Bool, TAcc) when is_boolean(Bool) ->
+    make_text_field(TAcc, <<"boolean">>, Bool);
+
+get_text_field_values(null, TAcc) ->
+    make_text_field(TAcc, <<"null">>, true).
+
+
+get_text_field_values_obj([], _, FAcc) ->
+    FAcc;
+get_text_field_values_obj([{Key, Val} | Rest], TAcc, FAcc) ->
+    NewPath = [Key | TAcc#tacc.path],
+    NewTAcc = TAcc#tacc{path = NewPath},
+    Fields = get_text_field_values(Val, NewTAcc),
+    get_text_field_values_obj(Rest, TAcc, Fields ++ FAcc).
+
+
+get_text_field_values_arr([], _, FAcc) ->
+    FAcc;
+get_text_field_values_arr([Value | Rest], TAcc, FAcc) ->
+    Fields = get_text_field_values(Value, TAcc),
+    get_text_field_values_arr(Rest, TAcc, Fields ++ FAcc).
+
+
+get_default_enabled(Props) ->
+    case couch_util:get_value(<<"default_field">>, Props, {[]}) of
+        Bool when is_boolean(Bool) ->
+            Bool;
+        {[]} ->
+            true;
+        {Opts}->
+            couch_util:get_value(<<"enabled">>, Opts, true)
+    end.
+
+
+add_default_text_field(Fields) ->
+    DefaultFields = add_default_text_field(Fields, []),
+    DefaultFields ++ Fields.
+
+
+add_default_text_field([], Acc) ->
+    Acc;
+add_default_text_field([{_Name, <<"string">>, Value} | Rest], Acc) ->
+    NewAcc = [{<<"$default">>, <<"string">>, Value} | Acc],
+    add_default_text_field(Rest, NewAcc);
+add_default_text_field([_ | Rest], Acc) ->
+    add_default_text_field(Rest, Acc).
+
+
+%% index of all field names
+get_field_names([], FAcc) ->
+    FAcc;
+get_field_names([{Name, _Type, _Value} | Rest], FAcc) ->
+    case lists:member([<<"$fieldnames">>, Name, []], FAcc) of
+        true ->
+            get_field_names(Rest, FAcc);
+        false ->
+            get_field_names(Rest, [[<<"$fieldnames">>, Name, []] | FAcc])
+    end.
+
+
+convert_text_fields([]) ->
+    [];
+convert_text_fields([{Name, _Type, Value} | Rest]) ->
+    [[Name, Value, []] | convert_text_fields(Rest)].
+
+
+should_index(Selector, Doc) ->
+    % We should do this
+    NormSelector = mango_selector:normalize(Selector),
+    Matches = mango_selector:match(NormSelector, Doc),
+    IsDesign = case mango_doc:get_field(Doc, <<"_id">>) of
+        <<"_design/", _/binary>> -> true;
+        _ -> false
+    end,
+    Matches and not IsDesign.
+
+
+get_text_field_list(IdxProps) ->
+    case couch_util:get_value(<<"fields">>, IdxProps) of
+        Fields when is_list(Fields) ->
+            lists:flatmap(fun get_text_field_info/1, Fields);
+        _ ->
+            all_fields
+    end.
+
+
+get_text_field_info({Props}) ->
+    Name = couch_util:get_value(<<"name">>, Props),
+    Type0 = couch_util:get_value(<<"type">>, Props),
+    if not is_binary(Name) -> []; true ->
+        Type = get_text_field_type(Type0),
+        [iolist_to_binary([Name, ":", Type])]
+    end.
+
+
+get_text_field_type(<<"number">>) ->
+    <<"number">>;
+get_text_field_type(<<"boolean">>) ->
+    <<"boolean">>;
+get_text_field_type(_) ->
+    <<"string">>.
+
+
+make_text_field(TAcc, Type, Value) ->
+    FieldName = make_text_field_name(TAcc#tacc.path, Type),
+    Fields = TAcc#tacc.fields,
+    case Fields == all_fields orelse lists:member(FieldName, Fields) of
+        true ->
+            [{mango_util:lucene_escape_field(FieldName), Type,
+            Value}];
+        false ->
+            []
+    end.
+
+
+make_text_field_name([P | Rest], Type) ->
+    make_text_field_name0(Rest, [P, ":", Type]).
+
+make_text_field_name0([], Name) ->
+    iolist_to_binary(Name);
+make_text_field_name0([P | Rest], Name) ->
+    make_text_field_name0(Rest, [P, "." | Name]).

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_opts.erl
----------------------------------------------------------------------
diff --git a/src/mango_opts.erl b/src/mango_opts.erl
index e15a446..f7874a6 100644
--- a/src/mango_opts.erl
+++ b/src/mango_opts.erl
@@ -29,6 +29,7 @@
     validate_idx_name/1,
     validate_selector/1,
     validate_use_index/1,
+    validate_bookmark/1,
     validate_sort/1,
     validate_fields/1
 ]).
@@ -82,6 +83,12 @@ validate_find({Props}) ->
             {default, []},
             {validator, fun validate_use_index/1}
         ]},
+        {<<"bookmark">>, [
+            {tag, bookmark},
+            {optional, true},
+            {default, <<>>},
+            {validator, fun validate_bookmark/1}
+        ]},
         {<<"limit">>, [
             {tag, limit},
             {optional, true},
@@ -211,6 +218,16 @@ validate_use_index(Else) ->
     ?MANGO_ERROR({invalid_index_name, Else}).
 
 
+validate_bookmark(null) ->
+    {ok, nil};
+validate_bookmark(<<>>) ->
+    {ok, nil};
+validate_bookmark(Bin) when is_binary(Bin) ->
+    {ok, Bin};
+validate_bookmark(Else) ->
+    ?MANGO_ERROR({invalid_bookmark, Else}).
+
+
 validate_sort(Value) ->
     mango_sort:new(Value).
 

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_selector.erl
----------------------------------------------------------------------
diff --git a/src/mango_selector.erl b/src/mango_selector.erl
index 56f2072..c008a4c 100644
--- a/src/mango_selector.erl
+++ b/src/mango_selector.erl
@@ -348,6 +348,9 @@ negate({[{<<"$and">>, Args}]}) ->
 negate({[{<<"$or">>, Args}]}) ->
     {[{<<"$and">>, [negate(A) || A <- Args]}]};
 
+negate({[{<<"$default">>, _}]} = Arg) ->
+    ?MANGO_ERROR({bad_arg, '$not', Arg});
+
 % Negating comparison operators is straight forward
 negate({[{<<"$lt">>, Arg}]}) ->
     {[{<<"$gte">>, Arg}]};
@@ -514,4 +517,4 @@ match({[{Field, Cond}]}, Value, Cmp) ->
     end;
 
 match({Props} = Sel, _Value, _Cmp) when length(Props) > 1 ->
-    erlang:error({unnormalized_selector, Sel}).
\ No newline at end of file
+    erlang:error({unnormalized_selector, Sel}).

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_selector_text.erl
----------------------------------------------------------------------
diff --git a/src/mango_selector_text.erl b/src/mango_selector_text.erl
new file mode 100644
index 0000000..35a0d4c
--- /dev/null
+++ b/src/mango_selector_text.erl
@@ -0,0 +1,347 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mango_selector_text).
+
+
+-export([
+    convert/1,
+    convert/2,
+
+    append_sort_type/2
+]).
+
+
+-include_lib("couch/include/couch_db.hrl").
+-include("mango.hrl").
+
+
+convert(Object) ->
+    TupleTree = convert([], Object),
+    iolist_to_binary(to_query(TupleTree)).
+
+
+convert(Path, {[{<<"$and">>, Args}]}) ->
+    Parts = [convert(Path, Arg) || Arg <- Args],
+    {op_and, Parts};
+convert(Path, {[{<<"$or">>, Args}]}) ->
+    Parts = [convert(Path, Arg) || Arg <- Args],
+    {op_or, Parts};
+convert(Path, {[{<<"$not">>, Arg}]}) ->
+    {op_not, {field_exists_query(Path), convert(Path, Arg)}};
+convert(Path, {[{<<"$default">>, Arg}]}) ->
+    {op_field, {_, Query}} = convert(Path, Arg),
+    {op_default, Query};
+
+% The $text operator specifies a Lucene syntax query
+% so we just pull it in directly.
+convert(Path, {[{<<"$text">>, Query}]}) when is_binary(Query) ->
+    {op_field, {make_field(Path, Query), value_str(Query)}};
+
+% The MongoDB docs for $all are super confusing and read more
+% like they screwed up the implementation of this operator
+% and then just documented it as a feature.
+%
+% This implementation will match the behavior as closely as
+% possible based on the available docs but we'll need to have
+% the testing team validate how MongoDB handles edge conditions
+convert(Path, {[{<<"$all">>, Args}]}) ->
+    case Args of
+        [Values] when is_list(Values) ->
+            % If Args is a single element array then we have to
+            % either match if Path is that array or if it contains
+            % the array as an element of an array (which isn't at all
+            % confusing). For Lucene to return us all possible matches
+            % that means we just need to search for each value in
+            % Path.[] and Path.[].[] and rely on our filtering to limit
+            % the results properly.
+            Fields1 = convert(Path, {[{<<"$eq">> , Values}]}),
+            Fields2 = convert([<<"[]">>| Path], {[{<<"$eq">> , Values}]}),
+            {op_or, [Fields1, Fields2]};
+        _ ->
+            % Otherwise the $all operator is equivalent to an $and
+            % operator so we treat it as such.
+            convert(Path, {[{<<"$eq">> , Args}]})
+    end;
+
+% The $elemMatch Lucene query is not an exact translation
+% as we can't enforce that the matches are all for the same
+% item in an array. We just rely on the final selector match
+% to filter out anything that doesn't match. The only trick
+% is that we have to add the `[]` path element since the docs
+% say this has to match against an array.
+convert(Path, {[{<<"$elemMatch">>, Arg}]}) ->
+    convert([<<"[]">> | Path], Arg);
+
+% Our comparison operators are fairly straight forward
+convert(Path, {[{<<"$lt">>, Arg}]}) when is_list(Arg); is_tuple(Arg);
+        Arg =:= null ->
+    field_exists_query(Path);
+convert(Path, {[{<<"$lt">>, Arg}]}) ->
+    {op_field, {make_field(Path, Arg), range(lt, Arg)}};
+convert(Path, {[{<<"$lte">>, Arg}]}) when is_list(Arg); is_tuple(Arg);
+        Arg =:= null->
+    field_exists_query(Path);
+convert(Path, {[{<<"$lte">>, Arg}]}) ->
+    {op_field, {make_field(Path, Arg), range(lte, Arg)}};
+%% This is for indexable_fields
+convert(Path, {[{<<"$eq">>, Arg}]}) when Arg =:= null ->
+    {op_null, {make_field(Path, Arg), value_str(Arg)}};
+convert(Path, {[{<<"$eq">>, Args}]}) when is_list(Args) ->
+    Path0 = [<<"[]">> | Path],
+    LPart = {op_field, {make_field(Path0, length), value_str(length(Args))}},
+    Parts0 = [convert(Path0, {[{<<"$eq">>, Arg}]}) || Arg <- Args],
+    Parts = [LPart | Parts0],
+    {op_and, Parts};
+convert(Path, {[{<<"$eq">>, {_} = Arg}]}) ->
+    convert(Path, Arg);
+convert(Path, {[{<<"$eq">>, Arg}]}) ->
+    {op_field, {make_field(Path, Arg), value_str(Arg)}};
+convert(Path, {[{<<"$ne">>, Arg}]}) ->
+    {op_not, {field_exists_query(Path), convert(Path, {[{<<"$eq">>, Arg}]})}};
+convert(Path, {[{<<"$gte">>, Arg}]}) when is_list(Arg); is_tuple(Arg);
+        Arg =:= null ->
+    field_exists_query(Path);
+convert(Path, {[{<<"$gte">>, Arg}]}) ->
+    {op_field, {make_field(Path, Arg), range(gte, Arg)}};
+convert(Path, {[{<<"$gt">>, Arg}]}) when is_list(Arg); is_tuple(Arg);
+        Arg =:= null->
+    field_exists_query(Path);
+convert(Path, {[{<<"$gt">>, Arg}]}) ->
+    {op_field, {make_field(Path, Arg), range(gt, Arg)}};
+
+convert(Path, {[{<<"$in">>, Args}]}) ->
+    {op_or, convert_in(Path, Args)};
+
+convert(Path, {[{<<"$nin">>, Args}]}) ->
+    {op_not, {field_exists_query(Path), convert(Path, {[{<<"$in">>, Args}]})}};
+
+convert(Path, {[{<<"$exists">>, ShouldExist}]}) ->
+    FieldExists = field_exists_query(Path),
+    case ShouldExist of
+        true -> FieldExists;
+        false -> {op_not, {FieldExists, false}}
+    end;
+
+% We're not checking the actual type here, just looking for
+% anything that has a possibility of matching by checking
+% for the field name. We use the same logic for $exists on
+% the actual query.
+convert(Path, {[{<<"$type">>, _}]}) ->
+    field_exists_query(Path);
+
+convert(Path, {[{<<"$mod">>, _}]}) ->
+    field_exists_query(Path, "number");
+
+convert(Path, {[{<<"$regex">>, _}]}) ->
+    field_exists_query(Path, "string");
+
+convert(Path, {[{<<"$size">>, Arg}]}) ->
+    {op_field, {make_field(Path, length), value_str(Arg)}};
+
+% All other operators are internal assertion errors for
+% matching because we either should've removed them during
+% normalization or something else broke.
+convert(_Path, {[{<<"$", _/binary>>=Op, _}]}) ->
+    ?MANGO_ERROR({invalid_operator, Op});
+
+% We've hit a field name specifier. We need to break the name
+% into path parts and continue our conversion.
+convert(Path, {[{Field, Cond}]}) ->
+    NewPathParts = re:split(Field, <<"\\.">>),
+    NewPath = lists:reverse(NewPathParts) ++ Path,
+    convert(NewPath, Cond);
+
+%% For $in
+convert(Path, Val) when is_binary(Val); is_number(Val); is_boolean(Val) ->
+    {op_field, {make_field(Path, Val), value_str(Val)}};
+
+% Anything else is a bad selector.
+convert(_Path, {Props} = Sel) when length(Props) > 1 ->
+    erlang:error({unnormalized_selector, Sel}).
+
+
+to_query({op_and, Args}) when is_list(Args) ->
+    Res = ["(", join(<<" AND ">>, lists:map(fun to_query/1, Args)), ")"],
+    Res;
+
+to_query({op_or, Args}) when is_list(Args) ->
+    ["(", join(" OR ", lists:map(fun to_query/1, Args)), ")"];
+
+to_query({op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
+    ["(", to_query(ExistsQuery), " AND NOT (", to_query(Arg), "))"];
+
+%% For $exists:false
+to_query({op_not, {ExistsQuery, false}}) ->
+    ["($fieldnames:/.*/ ", " AND NOT (", to_query(ExistsQuery), "))"];
+
+to_query({op_insert, Arg}) when is_binary(Arg) ->
+    ["(", Arg, ")"];
+
+%% We escape : and / for now for values and all lucene chars for fieldnames
+%% This needs to be resolved.
+to_query({op_field, {Name, Value}}) ->
+    NameBin = iolist_to_binary(Name),
+    ["(", mango_util:lucene_escape_field(NameBin), ":", Value, ")"];
+
+%% This is for indexable_fields
+to_query({op_null, {Name, Value}}) ->
+    NameBin = iolist_to_binary(Name),
+    ["(", mango_util:lucene_escape_field(NameBin), ":", Value, ")"];
+
+to_query({op_fieldname, {Name, Wildcard}}) ->
+    NameBin = iolist_to_binary(Name),
+    ["($fieldnames:", mango_util:lucene_escape_field(NameBin), Wildcard, ")"];
+
+to_query({op_default, Value}) ->
+    ["($default:", Value, ")"].
+
+
+join(_Sep, [Item]) ->
+    [Item];
+join(Sep, [Item | Rest]) ->
+    [Item, Sep | join(Sep, Rest)].
+
+
+%% We match on fieldname and fieldname.[]
+convert_in(Path, Args) ->
+    Path0 = [<<"[]">> | Path],
+    lists:map(fun(Arg) ->
+        case Arg of
+            {Object} ->
+                Parts = lists:map(fun (SubObject) ->
+                    Fields1 = convert(Path, {[SubObject]}),
+                    Fields2 = convert(Path0, {[SubObject]}),
+                    {op_or, [Fields1, Fields2]}
+                end, Object),
+                {op_or, Parts};
+            SingleVal ->
+                Fields1 = {op_field, {make_field(Path, SingleVal),
+                value_str(SingleVal)}},
+                Fields2 = {op_field, {make_field(Path0, SingleVal),
+                value_str(SingleVal)}},
+                {op_or, [Fields1, Fields2]}
+        end
+    end, Args).
+
+
+make_field(Path, length) ->
+    [path_str(Path), <<":length">>];
+make_field(Path, Arg) ->
+    [path_str(Path), <<":">>, type_str(Arg)].
+
+
+range(lt, Arg) ->
+    [<<"[-Infinity TO ">>, value_str(Arg), <<"}">>];
+range(lte, Arg) ->
+    [<<"[-Infinity TO ">>, value_str(Arg), <<"]">>];
+range(gte, Arg) ->
+    [<<"[">>, value_str(Arg), <<" TO Infinity]">>];
+range(gt, Arg) ->
+    [<<"{">>, value_str(Arg), <<" TO Infinity]">>].
+
+
+field_exists_query(Path) ->
+    % We specify two here for :* and .* so that we don't incorrectly
+    % match a path foo.name against foo.name_first (if were to just
+    % appened * isntead).
+    Parts = [
+        {op_fieldname, {[path_str(Path), ":"], "*"}},
+        {op_fieldname, {[path_str(Path), "."], "*"}}
+    ],
+    {op_or, Parts}.
+
+
+field_exists_query(Path, Type) ->
+    {op_fieldname, [path_str(Path), ":", Type]}.
+
+
+path_str(Path) ->
+    path_str(Path, []).
+
+
+path_str([], Acc) ->
+    Acc;
+path_str([Part], Acc) ->
+    % No reverse because Path is backwards
+    % during recursion of convert.
+    [Part | Acc];
+path_str([Part | Rest], Acc) ->
+    path_str(Rest, [<<".">>, Part | Acc]).
+
+
+type_str(Value) when is_number(Value) ->
+    <<"number">>;
+type_str(Value) when is_boolean(Value) ->
+    <<"boolean">>;
+type_str(Value) when is_binary(Value) ->
+    <<"string">>;
+type_str(null) ->
+    <<"null">>.
+
+
+value_str(Value) when is_binary(Value) ->
+    mango_util:lucene_escape_query_value(Value);
+value_str(Value) when is_integer(Value) ->
+    list_to_binary(integer_to_list(Value));
+value_str(Value) when is_float(Value) ->
+    list_to_binary(float_to_list(Value));
+value_str(true) ->
+    <<"true">>;
+value_str(false) ->
+    <<"false">>;
+value_str(null) ->
+    <<"true">>.
+
+
+append_sort_type(RawSortField, Selector) ->
+    EncodeField = mango_util:lucene_escape_field(RawSortField),
+    String = mango_util:has_suffix(EncodeField, <<"_3astring">>),
+    Number = mango_util:has_suffix(EncodeField, <<"_3anumber">>),
+    case {String, Number} of
+        {true, _} ->
+            <<EncodeField/binary, "<string>">>;
+        {_, true} ->
+            <<EncodeField/binary, "<number>">>;
+        _ ->
+            Type = get_sort_type(RawSortField, Selector),
+            <<EncodeField/binary, Type/binary>>
+    end.
+
+
+get_sort_type(Field, Selector) ->
+    Types = get_sort_types(Field, Selector, []),
+    case lists:usort(Types) of
+        [str] -> <<"_3astring<string>">>;
+        [num] -> <<"_3anumber<number>">>;
+        _ -> ?MANGO_ERROR({text_sort_error, Field})
+    end.
+
+
+get_sort_types(Field, {[{Field, {[{<<"$", _/binary>>, Cond}]}}]}, Acc)
+        when is_binary(Cond) ->
+    [str | Acc];
+
+get_sort_types(Field, {[{Field, {[{<<"$", _/binary>>, Cond}]}}]}, Acc)
+        when is_number(Cond) ->
+    [num | Acc];
+
+get_sort_types(Field, {[{_, Cond}]}, Acc) when is_list(Cond) ->
+    lists:foldl(fun(Arg, InnerAcc) ->
+        get_sort_types(Field, Arg, InnerAcc)
+    end, Acc, Cond);
+
+get_sort_types(Field, {[{_, Cond}]}, Acc)  when is_tuple(Cond)->
+    get_sort_types(Field, Cond, Acc);
+
+get_sort_types(_Field, _, Acc)  ->
+    Acc.

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/src/mango_util.erl
----------------------------------------------------------------------
diff --git a/src/mango_util.erl b/src/mango_util.erl
index b0767dc..f3b60b2 100644
--- a/src/mango_util.erl
+++ b/src/mango_util.erl
@@ -29,7 +29,12 @@
     dec_dbname/1,
 
     enc_hex/1,
-    dec_hex/1
+    dec_hex/1,
+
+    lucene_escape_field/1,
+    lucene_escape_query_value/1,
+
+    has_suffix/2
 ]).
 
 
@@ -227,3 +232,64 @@ dec_hex_byte(N) when N >= $A, N =< $F -> (N - $A) + 10;
 dec_hex_byte(N) -> throw({invalid_hex_character, N}).
 
 
+
+lucene_escape_field(Bin) when is_binary(Bin) ->
+    Str = binary_to_list(Bin),
+    Enc = lucene_escape_field(Str),
+    iolist_to_binary(Enc);
+lucene_escape_field([H | T]) when is_number(H), H >= 0, H =< 255 ->
+    if
+        H >= $a, $z >= H ->
+            [H | lucene_escape_field(T)];
+        H >= $A, $Z >= H ->
+            [H | lucene_escape_field(T)];
+        H >= $0, $9 >= H ->
+            [H | lucene_escape_field(T)];
+        true ->
+            Hi = enc_hex_byte(H div 16),
+            Lo = enc_hex_byte(H rem 16),
+            [$_, Hi, Lo | lucene_escape_field(T)]
+        end;
+lucene_escape_field([]) ->
+    [].
+
+
+lucene_escape_query_value(IoList) when is_list(IoList) ->
+    lucene_escape_query_value(iolist_to_binary(IoList));
+lucene_escape_query_value(Bin) when is_binary(Bin) ->
+    IoList = lucene_escape_qv(Bin),
+    iolist_to_binary(IoList).
+
+
+% This escapes the special Lucene query characters
+% listed below as well as any whitespace.
+%
+%   + - && || ! ( ) { } [ ] ^ ~ * ? : \ " /
+%
+
+lucene_escape_qv(<<>>) -> [];
+lucene_escape_qv(<<"&&", Rest/binary>>) ->
+    ["\\&&" | lucene_escape_qv(Rest)];
+lucene_escape_qv(<<"||", Rest/binary>>) ->
+    ["\\||" | lucene_escape_qv(Rest)];
+lucene_escape_qv(<<C, Rest/binary>>) ->
+    NeedsEscape = "+-(){}[]!^~*?:/\\\" \t\r\n",
+    Out = case lists:member(C, NeedsEscape) of
+        true -> ["\\", C];
+        false -> [C]
+    end,
+    Out ++ lucene_escape_qv(Rest).
+
+
+has_suffix(Bin, Suffix) when is_binary(Bin), is_binary(Suffix) ->
+    SBin = size(Bin),
+    SSuffix = size(Suffix),
+    if SBin < SSuffix -> false; true ->
+        PSize = SBin - SSuffix,
+        case Bin of
+            <<_:PSize/binary, Suffix/binary>> ->
+                true;
+            _ ->
+                false
+        end
+    end.

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/02-basic-find-test.py
----------------------------------------------------------------------
diff --git a/test/02-basic-find-test.py b/test/02-basic-find-test.py
index 4e3fc29..8113b21 100644
--- a/test/02-basic-find-test.py
+++ b/test/02-basic-find-test.py
@@ -1,3 +1,4 @@
+# -*- coding: latin-1 -*-
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 # use this file except in compliance with the License. You may obtain a copy of
 # the License at
@@ -10,7 +11,6 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-# -*- coding: latin-1 -*-
 
 import mango
 

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/04-key-tests.py
----------------------------------------------------------------------
diff --git a/test/04-key-tests.py b/test/04-key-tests.py
index 4a5e904..c673cf2 100644
--- a/test/04-key-tests.py
+++ b/test/04-key-tests.py
@@ -1,4 +1,16 @@
-# -*- coding: utf-8 -*-
+# -*- coding: latin-1 -*-
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
 
 import mango
 
@@ -28,6 +40,13 @@ TEST_DOCS = [
         "type": "complex_key",
         "title": "unicode key",
         "": "apple"
+    },
+    {
+        "title": "internal_fields_format",
+        "utf8-1[]:string" : "string",
+        "utf8-2[]:boolean[]" : True,
+        "utf8-3[]:number" : 9,
+        "utf8-3[]:null" : None
     }
 ]
 
@@ -37,29 +56,68 @@ class KeyTests(mango.DbPerClass):
     def setUpClass(klass):
         super(KeyTests, klass).setUpClass()
         klass.db.save_docs(TEST_DOCS, w=3)
-        klass.db.create_index(["type"])
+        klass.db.create_index(["type"], ddoc="view")
+        klass.db.create_text_index(ddoc="text")
+
+    def run_check(self, query, check, fields=None, indexes=None):
+        if indexes is None:
+            indexes = ["view", "text"]
+        for idx in indexes:
+            docs = self.db.find(query, fields=fields, use_index=idx)
+            check(docs)
 
     def test_dot_key(self):
+        query = {"type": "complex_key"}
         fields = ["title", "dot\\.key", "none.dot"]
-        docs = self.db.find({"type": "complex_key"}, fields = fields)
-        assert len(docs) == 4
-        assert docs[1].has_key("dot.key")
-        assert docs[1]["dot.key"] == "dot's value"
-        assert docs[1].has_key("none")
-        assert docs[1]["none"]["dot"] == "none dot's value"
+        def check(docs):
+            assert len(docs) == 4
+            assert docs[1].has_key("dot.key")
+            assert docs[1]["dot.key"] == "dot's value"
+            assert docs[1].has_key("none")
+            assert docs[1]["none"]["dot"] == "none dot's value"
+        self.run_check(query, check, fields=fields)
 
     def test_peso_key(self):
+        query = {"type": "complex_key"}
         fields = ["title", "$key", "deep.$key"]
-        docs = self.db.find({"type": "complex_key"}, fields = fields)
-        assert len(docs) == 4
-        assert docs[2].has_key("$key")
-        assert docs[2]["$key"] == "peso"
-        assert docs[2].has_key("deep")
-        assert docs[2]["deep"]["$key"] == "deep peso"
+        def check(docs):
+            assert len(docs) == 4
+            assert docs[2].has_key("$key")
+            assert docs[2]["$key"] == "peso"
+            assert docs[2].has_key("deep")
+            assert docs[2]["deep"]["$key"] == "deep peso"
+        self.run_check(query, check, fields=fields)
+
+    def test_unicode_in_fieldname(self):
+        query = {"type": "complex_key"}
+        fields = ["title", ""]
+        def check(docs):
+            assert len(docs) == 4
+            # note:  == \uf8ff
+            assert docs[3].has_key(u'\uf8ff')
+            assert docs[3][u'\uf8ff'] == "apple"
+        self.run_check(query, check, fields=fields)
+
+    # The rest of these tests are only run against the text
+    # indexes because view indexes don't have to worry about
+    # field *name* escaping in the index.
+
+    def test_unicode_in_selector_field(self):
+        query = {"" : "apple"}
+        def check(docs):
+            assert len(docs) == 1
+            assert docs[0][u"\uf8ff"] == "apple"
+        self.run_check(query, check, indexes=["text"])
 
-    def test_unicode_key(self):
-        docs = self.db.find({"type": "complex_key"}, fields = ["title", ""])
-        assert len(docs) == 4
-        # note:  == \uf8ff
-        assert docs[3].has_key(u'\uf8ff')
-        assert docs[3][u'\uf8ff'] == "apple"
+    def test_internal_field_tests(self):
+        queries = [
+            {"utf8-1[]:string" : "string"},
+            {"utf8-2[]:boolean[]" : True},
+            {"utf8-3[]:number" : 9},
+            {"utf8-3[]:null" : None}
+        ]
+        def check(docs):
+            assert len(docs) == 1
+            assert docs[0]["title"] == "internal_fields_format"
+        for query in queries:
+            self.run_check(query, check, indexes=["text"])

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/05-index-selection-test.py
----------------------------------------------------------------------
diff --git a/test/05-index-selection-test.py b/test/05-index-selection-test.py
index 5aa86c6..8c2c018 100644
--- a/test/05-index-selection-test.py
+++ b/test/05-index-selection-test.py
@@ -1,9 +1,24 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
 
 import mango
 import user_docs
 
 
 class IndexSelectionTests(mango.UserDocsTests):
+    @classmethod
+    def setUpClass(klass):
+        super(IndexSelectionTests, klass).setUpClass()
+        user_docs.add_text_indexes(klass.db, {})
 
     def test_basic(self):
         resp = self.db.find({"name.last": "A last name"}, explain=True)
@@ -16,6 +31,19 @@ class IndexSelectionTests(mango.UserDocsTests):
             }, explain=True)
         assert resp["index"]["type"] == "json"
 
+    def test_no_view_index(self):
+        resp = self.db.find({"name.first": "Ohai!"}, explain=True)
+        assert resp["index"]["type"] == "text"
+
+    def test_with_or(self):
+        resp = self.db.find({
+                "$or": [
+                    {"name.first": "Stephanie"},
+                    {"name.last": "This doesn't have to match anything."}
+                ]
+            }, explain=True)
+        assert resp["index"]["type"] == "text"
+
     def test_use_most_columns(self):
         # ddoc id for the age index
         ddocid = "_design/ad3d537c03cd7c6a43cf8dff66ef70ea54c2b40f"
@@ -32,3 +60,25 @@ class IndexSelectionTests(mango.UserDocsTests):
                 "age": {"$gt": 1}
             }, use_index=ddocid, explain=True)
         assert resp["index"]["ddoc"] == ddocid
+
+
+class MultiTextIndexSelectionTests(mango.UserDocsTests):
+    @classmethod
+    def setUpClass(klass):
+        super(MultiTextIndexSelectionTests, klass).setUpClass()
+        klass.db.create_text_index(ddoc="foo", analyzer="keyword")
+        klass.db.create_text_index(ddoc="bar", analyzer="email")
+
+    def test_view_ok_with_multi_text(self):
+        resp = self.db.find({"name.last": "A last name"}, explain=True)
+        assert resp["index"]["type"] == "json"
+
+    def test_multi_text_index_is_error(self):
+        try:
+            self.db.find({"$text": "a query"}, explain=True)
+        except Exception, e:
+            assert e.response.status_code == 400
+
+    def test_use_index_works(self):
+        resp = self.db.find({"$text": "a query"}, use_index="foo", explain=True)
+        assert resp["index"]["ddoc"] == "_design/foo"

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/06-basic-text-test.py
----------------------------------------------------------------------
diff --git a/test/06-basic-text-test.py b/test/06-basic-text-test.py
new file mode 100644
index 0000000..71eeb70
--- /dev/null
+++ b/test/06-basic-text-test.py
@@ -0,0 +1,488 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import mango
+import user_docs
+
+
+class BasicTextTests(mango.UserDocsTextTests):
+    def test_simple(self):
+        docs = self.db.find({"$text": "Stephanie"})
+        assert len(docs) == 1
+        assert docs[0]["name"]["first"] == "Stephanie"
+
+    def test_with_integer(self):
+        docs = self.db.find({"name.first": "Stephanie", "age": 48})
+        assert len(docs) == 1
+        assert docs[0]["name"]["first"] == "Stephanie"
+        assert docs[0]["age"] == 48
+
+    def test_with_boolean(self):
+        docs = self.db.find({"name.first": "Stephanie", "manager": False})
+        assert len(docs) == 1
+        assert docs[0]["name"]["first"] == "Stephanie"
+        assert docs[0]["manager"] == False
+
+    def test_with_array(self):
+        faves = ["Ruby", "C", "Python"]
+        docs = self.db.find({"name.first": "Stephanie", "favorites": faves})
+        assert docs[0]["name"]["first"] == "Stephanie"
+        assert docs[0]["favorites"] == faves
+
+    def test_lt(self):
+        docs = self.db.find({"age": {"$lt": 22}})
+        assert len(docs) == 0
+
+        docs = self.db.find({"age": {"$lt": 23}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": {"$lt": 33}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (1, 9)
+
+        docs = self.db.find({"age": {"$lt": 34}})
+        assert len(docs) == 3
+        for d in docs:
+            assert d["user_id"] in (1, 7, 9)
+
+    def test_lte(self):
+        docs = self.db.find({"age": {"$lte": 21}})
+        assert len(docs) == 0
+
+        docs = self.db.find({"age": {"$lte": 22}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": {"$lte": 33}})
+        assert len(docs) == 3
+        for d in docs:
+            assert d["user_id"] in (1, 7, 9)
+
+    def test_eq(self):
+        docs = self.db.find({"age": 21})
+        assert len(docs) == 0
+
+        docs = self.db.find({"age": 22})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": {"$eq": 22}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": 33})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 7
+
+    def test_ne(self):
+        docs = self.db.find({"age": {"$ne": 22}})
+        assert len(docs) == len(user_docs.DOCS) - 1
+        for d in docs:
+            assert d["age"] != 22
+
+        docs = self.db.find({"$not": {"age": 22}})
+        assert len(docs) == len(user_docs.DOCS) - 1
+        for d in docs:
+            assert d["age"] != 22
+
+    def test_gt(self):
+        docs = self.db.find({"age": {"$gt": 77}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (3, 13)
+
+        docs = self.db.find({"age": {"$gt": 78}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 3
+
+        docs = self.db.find({"age": {"$gt": 79}})
+        assert len(docs) == 0
+
+    def test_gte(self):
+        docs = self.db.find({"age": {"$gte": 77}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (3, 13)
+
+        docs = self.db.find({"age": {"$gte": 78}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (3, 13)
+
+        docs = self.db.find({"age": {"$gte": 79}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 3
+
+        docs = self.db.find({"age": {"$gte": 80}})
+        assert len(docs) == 0
+
+    def test_and(self):
+        docs = self.db.find({"age": 22, "manager": True})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": 22, "manager": False})
+        assert len(docs) == 0
+
+        docs = self.db.find({"$and": [{"age": 22}, {"manager": True}]})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"$and": [{"age": 22}, {"manager": False}]})
+        assert len(docs) == 0
+
+        docs = self.db.find({"$text": "Ramona", "age": 22})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"$and": [{"$text": "Ramona"}, {"age": 22}]})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"$and": [{"$text": "Ramona"}, {"$text": "Floyd"}]})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+    def test_or(self):
+        docs = self.db.find({"$or": [{"age": 22}, {"age": 33}]})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (7, 9)
+
+        q = {"$or": [{"$text": "Ramona"}, {"$text": "Stephanie"}]}
+        docs = self.db.find(q)
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (0, 9)
+
+        q = {"$or": [{"$text": "Ramona"}, {"age": 22}]}
+        docs = self.db.find(q)
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+    def test_and_or(self):
+        q = {
+            "age": 22,
+            "$or": [
+                {"manager": False},
+                {"location.state": "Missouri"}
+            ]
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        q = {
+            "$or": [
+                {"age": 22},
+                {"age": 43, "manager": True}
+            ]
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (9, 10)
+
+        q = {
+            "$or": [
+                {"$text": "Ramona"},
+                {"age": 43, "manager": True}
+            ]
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (9, 10)
+
+    def test_nor(self):
+        docs = self.db.find({"$nor": [{"age": 22}, {"age": 33}]})
+        assert len(docs) == 13
+        for d in docs:
+            assert d["user_id"] not in (7, 9)
+
+    def test_in_with_value(self):
+        docs = self.db.find({"age": {"$in": [1, 5]}})
+        assert len(docs) == 0
+
+        docs = self.db.find({"age": {"$in": [1, 5, 22]}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": {"$in": [1, 5, 22, 31]}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (1, 9)
+
+        docs = self.db.find({"age": {"$in": [22, 31]}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (1, 9)
+
+        # Limits on boolean clauses?
+        docs = self.db.find({"age": {"$in": range(1000)}})
+        assert len(docs) == 15
+
+    def test_in_with_array(self):
+        vals = ["Random Garbage", 52, {"Versions": {"Alpha": "Beta"}}]
+        docs = self.db.find({"favorites": {"$in": vals}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 1
+
+        vals = ["Lisp", "Python"]
+        docs = self.db.find({"favorites": {"$in": vals}})
+        assert len(docs) == 10
+
+        vals = [{"val1": 1, "val2": "val2"}]
+        docs = self.db.find({"test_in": {"$in": vals}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 2
+
+    def test_nin_with_value(self):
+        docs = self.db.find({"age": {"$nin": [1, 5]}})
+        assert len(docs) == len(user_docs.DOCS)
+
+        docs = self.db.find({"age": {"$nin": [1, 5, 22]}})
+        assert len(docs) == len(user_docs.DOCS) - 1
+        for d in docs:
+            assert d["user_id"] != 9
+
+        docs = self.db.find({"age": {"$nin": [1, 5, 22, 31]}})
+        assert len(docs) == len(user_docs.DOCS) - 2
+        for d in docs:
+            assert d["user_id"] not in (1, 9)
+
+        docs = self.db.find({"age": {"$nin": [22, 31]}})
+        assert len(docs) == len(user_docs.DOCS) - 2
+        for d in docs:
+            assert d["user_id"] not in (1, 9)
+
+        # Limits on boolean clauses?
+        docs = self.db.find({"age": {"$nin": range(1000)}})
+        assert len(docs) == 0
+
+    def test_nin_with_array(self):
+        vals = ["Random Garbage", 52, {"Versions": {"Alpha": "Beta"}}]
+        docs = self.db.find({"favorites": {"$nin": vals}})
+        assert len(docs) == len(user_docs.DOCS) - 1
+        for d in docs:
+            assert d["user_id"] != 1
+
+        vals = ["Lisp", "Python"]
+        docs = self.db.find({"favorites": {"$nin": vals}})
+        assert len(docs) == 5
+
+        vals = [{"val1": 1, "val2": "val2"}]
+        docs = self.db.find({"test_in": {"$nin": vals}})
+        assert len(docs) == 0
+
+    def test_all(self):
+        vals = ["Ruby", "C", "Python", {"Versions": {"Alpha": "Beta"}}]
+        docs = self.db.find({"favorites": {"$all": vals}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 1
+
+        # This matches where favorites either contains
+        # the nested array, or is the nested array. This is
+        # notably different than the non-nested array in that
+        # it does not match a re-ordered version of the array.
+        # The fact that user_id 14 isn't included demonstrates
+        # this behavior.
+        vals = [["Lisp", "Erlang", "Python"]]
+        docs = self.db.find({"favorites": {"$all": vals}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (3, 9)
+
+    def test_exists_field(self):
+        docs = self.db.find({"exists_field": {"$exists": True}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (7, 8)
+
+        docs = self.db.find({"exists_field": {"$exists": False}})
+        assert len(docs) == len(user_docs.DOCS) - 2
+        for d in docs:
+            assert d["user_id"] not in (7, 8)
+
+    def test_exists_array(self):
+        docs = self.db.find({"exists_array": {"$exists": True}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (9, 10)
+
+        docs = self.db.find({"exists_array": {"$exists": False}})
+        assert len(docs) == len(user_docs.DOCS) - 2
+        for d in docs:
+            assert d["user_id"] not in (9, 10)
+
+    def test_exists_object(self):
+        docs = self.db.find({"exists_object": {"$exists": True}})
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (11, 12)
+
+        docs = self.db.find({"exists_object": {"$exists": False}})
+        assert len(docs) == len(user_docs.DOCS) - 2
+        for d in docs:
+            assert d["user_id"] not in (11, 12)
+
+    def test_exists_object_member(self):
+        docs = self.db.find({"exists_object.should": {"$exists": True}})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 11
+
+        docs = self.db.find({"exists_object.should": {"$exists": False}})
+        assert len(docs) == len(user_docs.DOCS) - 1
+        for d in docs:
+            assert d["user_id"] != 11
+
+    def test_exists_and(self):
+        q = {"$and": [
+            {"manager": {"$exists": True}},
+            {"exists_object.should": {"$exists": True}}
+        ]}
+        docs = self.db.find(q)
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 11
+
+        q = {"$and": [
+            {"manager": {"$exists": False}},
+            {"exists_object.should": {"$exists": True}}
+        ]}
+        docs = self.db.find(q)
+        assert len(docs) == 0
+
+        # Translates to manager exists or exists_object.should doesn't
+        # exist, which will match all docs
+        q = {"$not": q}
+        docs = self.db.find(q)
+        assert len(docs) == len(user_docs.DOCS)
+
+    def test_value_chars(self):
+        q = {"complex_field_value": "+-(){}[]^~&&*||\"\\/?:!"}
+        docs = self.db.find(q)
+        assert len(docs) == 1
+
+    # test lucene syntax in $text
+
+
+class ElemMatchTests(mango.FriendDocsTextTests):
+    def test_elem_match(self):
+        q = {"friends": {
+                "$elemMatch":
+                    {"name.first": "Vargas"}
+            }
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (0, 1)
+
+        q = {
+            "friends": {
+                "$elemMatch": {
+                    "name.first": "Ochoa",
+                    "name.last": "Burch"
+                }
+            }
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 4
+
+
+        # Check that we can do logic in elemMatch
+        q = {
+            "friends": {"$elemMatch": {
+                "name.first": "Ochoa", "type": "work"
+            }}
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 1
+
+        q = {
+            "friends": {
+                "$elemMatch": {
+                    "name.first": "Ochoa",
+                    "$or": [
+                        {"type": "work"},
+                        {"type": "personal"}
+                    ]
+                }
+            }
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (1, 4)
+
+        # Same as last, but using $in
+        q = {
+            "friends": {
+                "$elemMatch": {
+                    "name.first": "Ochoa",
+                    "type": {"$in": ["work", "personal"]}
+                }
+            }
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 2
+        for d in docs:
+            assert d["user_id"] in (1, 4)
+
+        q = {
+            "$and": [{
+                "friends": {
+                    "$elemMatch": {
+                        "id": 0,
+                        "name": {
+                            "$exists": True
+                            }
+                        }
+                    }
+                },
+                {
+                "friends": {
+                    "$elemMatch": {
+                        "$or": [
+                            {
+                            "name": {
+                                "first": "Campos",
+                                "last": "Freeman"
+                                }
+                            },
+                            {
+                            "name": {
+                                "$in": [{
+                                    "first": "Gibbs",
+                                    "last": "Mccarty"
+                                    },
+                                    {
+                                    "first": "Wilkins",
+                                    "last": "Chang"
+                                     }
+                                    ]
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            ]
+        }
+        docs = self.db.find(q)
+        assert len(docs) == 3
+        for d in docs:
+            assert d["user_id"] in (10, 11,12)

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/06-text-default-field-test.py
----------------------------------------------------------------------
diff --git a/test/06-text-default-field-test.py b/test/06-text-default-field-test.py
new file mode 100644
index 0000000..691a885
--- /dev/null
+++ b/test/06-text-default-field-test.py
@@ -0,0 +1,70 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import mango
+
+
+
+class NoDefaultFieldTest(mango.UserDocsTextTests):
+
+    DEFAULT_FIELD = False
+
+    def test_basic(self):
+        docs = self.db.find({"$text": "Ramona"})
+        # Or should this throw an error?
+        assert len(docs) == 0
+
+    def test_other_fields_exist(self):
+        docs = self.db.find({"age": 22})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+
+class NoDefaultFieldWithAnalyzer(mango.UserDocsTextTests):
+
+    DEFAULT_FIELD = {
+        "enabled": False,
+        "analyzer": "keyword"
+    }
+
+    def test_basic(self):
+        docs = self.db.find({"$text": "Ramona"})
+        assert len(docs) == 0
+
+    def test_other_fields_exist(self):
+        docs = self.db.find({"age": 22})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+
+class DefaultFieldWithCustomAnalyzer(mango.UserDocsTextTests):
+
+    DEFAULT_FIELD = {
+        "enabled": True,
+        "analyzer": "keyword"
+    }
+
+    def test_basic(self):
+        docs = self.db.find({"$text": "Ramona"})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+    def test_not_analyzed(self):
+        docs = self.db.find({"$text": "Lott Place"})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"$text": "Lott"})
+        assert len(docs) == 0
+
+        docs = self.db.find({"$text": "Place"})
+        assert len(docs) == 0

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/07-text-custom-field-list-test.py
----------------------------------------------------------------------
diff --git a/test/07-text-custom-field-list-test.py b/test/07-text-custom-field-list-test.py
new file mode 100644
index 0000000..5e5f7cc
--- /dev/null
+++ b/test/07-text-custom-field-list-test.py
@@ -0,0 +1,62 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import mango
+
+
+
+class CustomFieldsTest(mango.UserDocsTextTests):
+
+    FIELDS = [
+        {"name": "favorites.[]", "type": "string"},
+        {"name": "manager", "type": "boolean"},
+        {"name": "age", "type": "number"},
+        # These two are to test the default analyzer for
+        # each field.
+        {"name": "location.state", "type": "string"},
+        {
+            "name": "location.address.street",
+            "type": "string"
+        }
+    ]
+
+    def test_basic(self):
+        docs = self.db.find({"age": 22})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+    def test_multi_field(self):
+        docs = self.db.find({"age": 22, "manager": True})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 9
+
+        docs = self.db.find({"age": 22, "manager": False})
+        assert len(docs) == 0
+
+    def test_missing(self):
+        self.db.find({"location.state": "Nevada"})
+
+    def test_missing_type(self):
+        # Raises an exception
+        try:
+            self.db.find({"age": "foo"})
+            raise Exception("Should have thrown an HTTPError")
+        except:
+            return
+
+    def test_field_analyzer_is_keyword(self):
+        docs = self.db.find({"location.state": "New"})
+        assert len(docs) == 0
+
+        docs = self.db.find({"location.state": "New Hampshire"})
+        assert len(docs) == 1
+        assert docs[0]["user_id"] == 10

http://git-wip-us.apache.org/repos/asf/couchdb-mango/blob/aa4edf42/test/08-text-limit-test.py
----------------------------------------------------------------------
diff --git a/test/08-text-limit-test.py b/test/08-text-limit-test.py
new file mode 100644
index 0000000..72c87b5
--- /dev/null
+++ b/test/08-text-limit-test.py
@@ -0,0 +1,134 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+import mango
+import limit_docs
+
+class LimitTests(mango.LimitDocsTextTests):
+
+    def test_limit_field(self):
+        q = {"$or": [{"user_id" : {"$lt" : 10}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=10)
+        assert len(docs) == 8
+        for d in docs:
+            assert d["user_id"] < 10
+
+    def test_limit_field2(self):
+        q = {"$or": [{"user_id" : {"$lt" : 20}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=10)
+        assert len(docs) == 10
+        for d in docs:
+            assert d["user_id"] < 20
+
+    def test_limit_field3(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=1)
+        assert len(docs) == 1
+        for d in docs:
+            assert d["user_id"] < 100
+
+    def test_limit_field4(self):
+        q = {"$or": [{"user_id" : {"$lt" : 0}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=35)
+        assert len(docs) == 0
+
+    # We reach our cap here of 50
+    def test_limit_field5(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=55)
+        assert len(docs) == 50
+        for d in docs:
+            assert d["user_id"] < 100
+
+    def test_limit_skip_field1(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=10, skip=20)
+        assert len(docs) == 10
+        for d in docs:
+            assert d["user_id"] > 20
+
+    def test_limit_skip_field2(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=100, skip=100)
+        assert len(docs) == 0
+
+    def test_limit_skip_field3(self):
+        q = {"$or": [{"user_id" : {"$lt" : 20}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=1, skip=30)
+        assert len(docs) == 0
+
+    def test_limit_skip_field4(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        docs = self.db.find(q, limit=0, skip=0)
+        assert len(docs) == 0
+
+    def test_limit_skip_field5(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        try:
+            self.db.find(q, limit=-1)
+        except Exception, e:
+            assert e.response.status_code == 400
+        else:
+            raise AssertionError("Should have thrown error for negative limit")
+
+    def test_limit_skip_field6(self):
+        q = {"$or": [{"user_id" : {"$lt" : 100}}, {"filtered_array.[]": 1}]}
+        try:
+            self.db.find(q, skip=-1)
+        except Exception, e:
+            assert e.response.status_code == 400
+        else:
+            raise AssertionError("Should have thrown error for negative skip")
+
+    # Basic test to ensure we can iterate through documents with a bookmark
+    def test_limit_bookmark(self):
+        for i in range(1, len(limit_docs.DOCS), 5):
+            self.run_bookmark_check(i)
+
+        for i in range(1, len(limit_docs.DOCS), 5):
+            self.run_bookmark_sort_check(i)
+
+
+    def run_bookmark_check(self, size):
+        print size
+        q = {"age": {"$gt": 0}}
+        seen_docs = set()
+        bm = None
+        while True:
+            json = self.db.find(q, limit=size, bookmark=bm, return_raw=True)
+            for doc in json["docs"]:
+                assert doc["_id"] not in seen_docs
+                seen_docs.add(doc["_id"])
+            if not len(json["docs"]):
+                break
+            assert json["bookmark"] != bm
+            bm = json["bookmark"]
+        assert len(seen_docs) == len(limit_docs.DOCS)
+
+    def run_bookmark_sort_check(self, size):
+        q = {"age": {"$gt": 0}}
+        seen_docs = set()
+        bm = None
+        age = 0
+        while True:
+            json = self.db.find(q, limit=size, bookmark=bm, sort=["age"],
+                return_raw=True)
+            for doc in json["docs"]:
+                assert doc["_id"] not in seen_docs
+                assert doc["age"] >= age
+                age = doc["age"]
+                seen_docs.add(doc["_id"])
+            if not len(json["docs"]):
+                break
+            assert json["bookmark"] != bm
+            bm = json["bookmark"]
+        assert len(seen_docs) == len(limit_docs.DOCS)


Mime
View raw message