incubator-couchdb-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mike Leddy <m...@loop.com.br>
Subject Re: Having purge problems
Date Thu, 06 May 2010 15:26:20 GMT
Just for the record, there was a problem with the patch which affects
continuing compactions that fail for whatever reason.


$ 
cat compact_deleted.patch
--- couchdb-0.11.0/src/couchdb/couch_db.erl	2010-03-04 02:17:44.000000000 -0300
+++ couchdb-0.11.0.new/src/couchdb/couch_db.erl	2010-05-04 17:18:54.000000000 -0300
@@ -323,7 +323,7 @@
 get_revs_limit(#db{revs_limit=Limit}) ->
     Limit.
 
-set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit > 0 ->
+set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit >= 0 ->
     check_is_admin(Db),
     gen_server:call(Pid, {set_revs_limit, Limit}, infinity);
 set_revs_limit(_Db, _Limit) ->
--- couchdb-0.11.0/src/couchdb/couch_key_tree.erl	2009-11-21 10:43:43.000000000 -0300
+++ couchdb-0.11.0.new/src/couchdb/couch_key_tree.erl	2010-05-04 17:40:57.000000000 -0300
@@ -314,7 +314,7 @@
     % flatten each branch in a tree into a tree path
     Paths = get_all_leafs_full(Trees),
 
-    Paths2 = [{Pos, lists:sublist(Path, Limit)} || {Pos, Path} <- Paths],
+    Paths2 = [{Pos, lists:sublist(Path, lists:max([Limit, 1]))} || {Pos, Path} <- Paths],
 
     % convert paths back to trees
     lists:foldl(
--- couchdb-0.11.0/src/couchdb/couch_db_updater.erl	2010-02-22 12:20:53.000000000 -0300
+++ couchdb-0.11.0.new/src/couchdb/couch_db_updater.erl	2010-05-05 09:19:50.000000000 -0300
@@ -736,9 +736,16 @@
         end, Tree).
             
 
-copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
-    Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
-    LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids),
+copy_docs(#db{revs_limit=Limit}=Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
+    if Limit > 0 ->
+      Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
+      LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids);
+    true ->
+      AllIds = [Id || #doc_info{id=Id} <- InfoBySeq],
+      BaseResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, AllIds),
+      LookupResults = [Filtered || {ok, #full_doc_info{deleted=false}}=Filtered <- BaseResults],
+      Ids = [Id || {ok, #full_doc_info{id=Id}} <- LookupResults]
+    end,
 
     % write out the attachments
     NewFullDocInfos0 = lists:map(

       
Regards,

Mike    
 
-copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
-    Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
-    LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids),
+copy_docs(#db{revs_limit=Limit}=Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
+    if Limit > 0 ->
+      Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
+      LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids);
+    true ->
+      AllIds = [Id || #doc_info{id=Id} <- InfoBySeq],
+      BaseResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, AllIds),
+      LookupResults = [Filtered || {ok, #full_doc_info{deleted=false}}=Filtered <- BaseResults],
+      Ids = [Id || {ok, #full_doc_info{id=Id}} <- LookupResults]
+    end,
 
     % write out the attachments
     NewFullDocInfos0 = lists:map(


On Wed, 2010-05-05 at 10:42 -0300, Mike Leddy wrote:
> Hi,
> 
> You are welcome.
> 
> I had imagined trying some sort of database swapping but I never liked 
> the idea of switching where the app looks to the shadow/replacement and
> doing it correctly while always available and replicating.
> 
> I decided to bite the bullet and went for the my desired solution. It 
> may be completely against the grain of good practice but I decided to 
> patch couchdb to be able to purge deleted data on compaction.
> 
> Please note I am an Erlang/Couchdb newbie.
> 
> First it seemed reasonable to use revs_limit to trigger deleted document
> removal as I also want to clear out older revisions as well. I decided
> that a revs_limit = 0 might be appropriate.
> 
> Patching couch_db.erl to allow revs_limit = 0 gave me:
> 
> --- couchdb-0.11.0/src/couchdb/couch_db.erl     2010-03-04 02:17:44.000000000 -0300
> +++ couchdb-0.11.0.new/src/couchdb/couch_db.erl 2010-05-04 17:18:54.000000000 -0300
> @@ -323,7 +323,7 @@
>  get_revs_limit(#db{revs_limit=Limit}) ->
>      Limit.
>  
> -set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit > 0 ->
> +set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit >= 0 ->
>      check_is_admin(Db),
>      gen_server:call(Pid, {set_revs_limit, Limit}, infinity);
>  set_revs_limit(_Db, _Limit) ->
> 
> Making sure that revs_limit = 0 doesn't wipe out the whole database:
> 
> --- couchdb-0.11.0/src/couchdb/couch_key_tree.erl       2009-11-21 10:43:43.000000000
-0300
> +++ couchdb-0.11.0.new/src/couchdb/couch_key_tree.erl   2010-05-04 17:40:57.000000000
-0300
> @@ -314,7 +314,7 @@
>      % flatten each branch in a tree into a tree path
>      Paths = get_all_leafs_full(Trees),
>  
> -    Paths2 = [{Pos, lists:sublist(Path, Limit)} || {Pos, Path} <- Paths],
> +    Paths2 = [{Pos, lists:sublist(Path, lists:max([Limit, 1]))} || {Pos, Path} <-
Paths],
>  
>      % convert paths back to trees
>      lists:foldl(
> 
> Now the trickier part, choosing a good place to filter out the deleted 
> docs.... After several failed attempts I chose this:
> 
> --- couchdb-0.11.0/src/couchdb/couch_db_updater.erl     2010-02-22 12:20:53.000000000
-0300
> +++ couchdb-0.11.0.new/src/couchdb/couch_db_updater.erl 2010-05-05 09:19:50.000000000
-0300
> @@ -736,9 +736,16 @@
>          end, Tree).
>              
>  
> -copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
> -    Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
> -    LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids),
> +copy_docs(#db{revs_limit=Limit}=Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
> +    if Limit > 0 ->
> +      Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
> +      LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids);
> +    true ->
> +      AllIds = [Id || #doc_info{id=Id} <- InfoBySeq],
> +      BaseResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, AllIds),
> +      LookupResults = [Filtered || {ok, #full_doc_info{deleted=false}}=Filtered <-
BaseResults],
> +      Ids = [Id || #full_doc_info{id=Id} <- LookupResults]
> +    end,
>  
>      % write out the attachments
>      NewFullDocInfos0 = lists:map(
> 
> Now I can simply:
> 
> curl -X PUT 'localhost:5984/db/_revs_limit' -d '0'
> 
> and then:
> 
> curl -X POST 'localhost:5984/db/_compact'
> 
> Hopefully I haven broken anything - I am still running some tests but
> it looks like am able to do what I need and stay 100% available without
> altering my application.
> 
> I am not suggesting that anyone use this as it is but it might trigger 
> someone to incorporate something in couchdb that has a similar 
> functionality.
> 
> Best regards,
> 
> Mike
> 
> 
> On Tue, 2010-05-04 at 09:09 -0700, J Chris Anderson wrote:
> > On May 3, 2010, at 8:56 AM, Mike Leddy wrote:
> > 
> > > Hi,
> > > 
> > > I am currently on couchdb 0.11.0 using official debian packages with
> > > erlang 1:13.b.4-dfsg-4 and I am having problems purging old documents.
> > > 
> > > My database is constantly receiving new data and old data (more than
> > > six weeks) is being deleted. I have been running like this for several
> > > months and the overhead of old deleted document 'stubs' is becoming
> > > relevant in day to day operations such as new replications, database
> > > compaction etc.
> > > 
> > > I decided that it would be best to purge the old deleted documents
> > > so that the database would compact better and only contain relevant
> > > ie. recent data.
> > > 
> > > [What I would really like would be a compact that does not include
> > > documents that match a filter function, then I could do this on each
> > > node independently.]
> > > 
> > > Unfortunately I am encountering problems purging the documents. I wrote
> > > a script to process all the documents via _changes and purge the old
> > > documents but I keep hitting documents that cannot be purged.
> > > 
> > 
> > Thanks for the bug report. One way I've seen people accomplish this use case that
doesn't involve purging, is by storing documents into a new database each week, and then throwing
out old database files. 
> > 
> > Purging is really designed for removing secret data that was accidentally saved,
more than for reclaiming space. Thanks for the bug report - hopefully it will be easy to fix.
> > 
> > Chris
> > 
> > > Here is the start of my changes feed:
> > > 
> > > curl 'localhost:5984/iris/_changes?limit=5&since=0'
> > > {"results":[
> > > {"seq":2,"id":"_design/admin","changes":[{"rev":"1-ea95c1898a2c779d664c1d1b71a24f33"}]},
> > > {"seq":22435808,"id":"1259540160F2016","changes":[{"rev":"2-7dcfd742f74c79286c3f3093595a83df"}],"deleted":true},
> > > {"seq":22435809,"id":"1259540640F2016","changes":[{"rev":"2-6bd122eb9f83c0838bc9875a1b73abaf"}],"deleted":true},
> > > {"seq":22435810,"id":"1259616780F2443","changes":[{"rev":"2-53e2311f5de7058fbfd55979816d3efc"}],"deleted":true},
> > > {"seq":22435811,"id":"1259616784F2443","changes":[{"rev":"2-caaff4cd1290f7807c2bcfeb6edc39e0"}],"deleted":true}
> > > ],
> > > "last_seq":22435811}
> > > 
> > > This is a compacted copy of my main production database which is
> > > already on seq 106280009.
> > > 
> > > When i try to purge I get a badarity error:
> > > 
> > > curl -X POST 'localhost:5984/iris/_purge' -d
> > > '{"1259540160F2016":["2-7dcfd742f74c79286c3f3093595a83df"]}'
> > > {"error":"{{badarity,{#Fun<couch_db_updater.25.101160745>,\n
> > > [{2,<<124,230,79,165,16,199,208,127,32,211,160,223,180,12,3,28>>},\n
> > > {true,4185,19290621}]}},\n [{couch_key_tree,map_leafs_simple,3},\n
> > > {couch_key_tree,map_leafs_simple,3},\n  {couch_key_tree,map_leafs,2},\n
> > > {couch_db_updater,'-handle_call/3-fun-2-',2},\n  {lists,mapfoldl,3},\n
> > > {couch_db_updater,handle_call,3},\n  {gen_server,handle_msg,5},\n
> > > {proc_lib,init_p_do_apply,3}]}","reason":"{gen_server,call,\n
> > > [<0.28323.1>,\n             {purge_docs,[{<<\"1259540160F2016\">>,\n
> > > [{2,\n
> > > <<125,207,215,66,247,76,121,40,108,63,48,\n
> > > 147,89,90,131,223>>}]}]}]}"}
> > > 
> > > This is what appears in my server log:
> > > 
> > > [Mon, 03 May 2010 15:45:03 GMT] [error] [<0.28323.1>] ** Generic server
> > > <0.28323.1> terminating 
> > > ** Last message in was {purge_docs,[{<<"1259540160F2016">>,
> > >                                     [{2,
> > > 
> > > <<125,207,215,66,247,76,121,40,108,
> > >                                         63,48,147,89,90,131,223>>}]}]}
> > > ** When Server state == {db,<0.28322.1>,<0.28323.1>,nil,
> > > 
> > > <<"1272901503155514">>,<0.28320.1>,<0.28324.1>,
> > >                            {db_header,5,106014165,0,
> > >                                {59608213341,{24516895,36227125}},
> > >                                {59608203141,60744020},
> > >                                {59608273696,[]},
> > >                                0,nil,nil,1000},
> > >                            106014165,
> > >                            {btree,<0.28320.1>,
> > >                                {59608213341,{24516895,36227125}},
> > >                                #Fun<couch_db_updater.7.132302543>,
> > >                                #Fun<couch_db_updater.8.107957134>,
> > >                                #Fun<couch_btree.5.124754102>,
> > >                                #Fun<couch_db_updater.9.46112288>},
> > >                            {btree,<0.28320.1>,
> > >                                {59608203141,60744020},
> > >                                #Fun<couch_db_updater.10.19027664>,
> > >                                #Fun<couch_db_updater.11.35033879>,
> > >                                #Fun<couch_btree.5.124754102>,
> > >                                #Fun<couch_db_updater.12.56344865>},
> > >                            {btree,<0.28320.1>,
> > >                                {59608273696,[]},
> > >                                #Fun<couch_btree.0.83553141>,
> > >                                #Fun<couch_btree.1.30790806>,
> > >                                #Fun<couch_btree.2.124754102>,nil},
> > >                            106014165,<<"iris">>,
> > > 
> > > "/var/lib/couchdb/0.11.0/iris.couch",[],[],nil,
> > >                            {user_ctx,null,[],undefined},
> > >                            nil,1000,
> > >                            [before_header,after_header,on_file_open]}
> > > ** Reason for termination == 
> > > ** {{badarity,{#Fun<couch_db_updater.25.101160745>,
> > >               [{2,
> > > 
> > > <<124,230,79,165,16,199,208,127,32,211,160,223,180,12,3,28>>},
> > >                {true,4185,19290621}]}},
> > >    [{couch_key_tree,map_leafs_simple,3},
> > >     {couch_key_tree,map_leafs_simple,3},
> > >     {couch_key_tree,map_leafs,2},
> > >     {couch_db_updater,'-handle_call/3-fun-2-',2},
> > >     {lists,mapfoldl,3},
> > >     {couch_db_updater,handle_call,3},
> > >     {gen_server,handle_msg,5},
> > >     {proc_lib,init_p_do_apply,3}]}
> > > 
> > > 
> > > [Mon, 03 May 2010 15:45:03 GMT] [error] [<0.28323.1>]
> > > {error_report,<0.31.0>,
> > >    {<0.28323.1>,crash_report,
> > >     [[{initial_call,{couch_db_updater,init,['Argument__1']}},
> > >       {pid,<0.28323.1>},
> > >       {registered_name,[]},
> > >       {error_info,
> > >           {exit,
> > >               {{badarity,
> > >                    {#Fun<couch_db_updater.25.101160745>,
> > >                     [{2,
> > > 
> > > <<124,230,79,165,16,199,208,127,32,211,160,223,180,12,
> > >                         3,28>>},
> > >                      {true,4185,19290621}]}},
> > >                [{couch_key_tree,map_leafs_simple,3},
> > >                 {couch_key_tree,map_leafs_simple,3},
> > >                 {couch_key_tree,map_leafs,2},
> > >                 {couch_db_updater,'-handle_call/3-fun-2-',2},
> > >                 {lists,mapfoldl,3},
> > >                 {couch_db_updater,handle_call,3},
> > >                 {gen_server,handle_msg,5},
> > >                 {proc_lib,init_p_do_apply,3}]},
> > > 
> > > [{gen_server,terminate,6},{proc_lib,init_p_do_apply,3}]}},
> > >       {ancestors,
> > > 
> > > [<0.28322.1>,couch_server,couch_primary_services,couch_server_sup,
> > >            <0.32.0>]},
> > >       {messages,[]},
> > >       {links,[<0.28322.1>]},
> > >       {dictionary,[]},
> > >       {trap_exit,false},
> > >       {status,running},
> > >       {heap_size,4181},
> > >       {stack_size,24},
> > >       {reductions,42618}],
> > >      []]}}
> > > 
> > > [Mon, 03 May 2010 15:45:03 GMT] [error] [<0.28291.1>] Uncaught error
in
> > > HTTP request: {exit,
> > >                                 {{{badarity,
> > > 
> > > {#Fun<couch_db_updater.25.101160745>,
> > >                                     [{2,
> > > 
> > > <<124,230,79,165,16,199,208,127,32,211,
> > >                                         160,223,180,12,3,28>>},
> > >                                      {true,4185,19290621}]}},
> > >                                   [{couch_key_tree,map_leafs_simple,3},
> > >                                    {couch_key_tree,map_leafs_simple,3},
> > >                                    {couch_key_tree,map_leafs,2},
> > >                                    {couch_db_updater,
> > >                                     '-handle_call/3-fun-2-',2},
> > >                                    {lists,mapfoldl,3},
> > >                                    {couch_db_updater,handle_call,3},
> > >                                    {gen_server,handle_msg,5},
> > >                                    {proc_lib,init_p_do_apply,3}]},
> > >                                  {gen_server,call,
> > >                                   [<0.28323.1>,
> > >                                    {purge_docs,
> > >                                     [{<<"1259540160F2016">>,
> > >                                       [{2,
> > >                                         <<125,207,215,66,247,76,121,
> > >                                           40,108,63,48,147,89,90,131,
> > >                                           223>>}]}]}]}}}
> > > 
> > > [Mon, 03 May 2010 15:45:03 GMT] [info] [<0.28291.1>] Stacktrace:
> > > [{gen_server,call,2},
> > >             {couch_httpd_db,db_req,2},
> > >             {couch_httpd_db,do_db_req,2},
> > >             {couch_httpd,handle_request_int,5},
> > >             {mochiweb_http,headers,5},
> > >             {proc_lib,init_p_do_apply,3}]
> > > 
> > > [Mon, 03 May 2010 15:45:03 GMT] [info] [<0.28291.1>] 127.0.0.1 - -
> > > 'POST' /iris/_purge 500
> > > 
> > > Any suggestions would be greatly appreciated.
> > > 
> > > Thanks,
> > > 
> > > Mike
> > > 
> > > 
> > > 
> > > 
> > 
> > 
> 
> 
> 
> 



Mime
View raw message