incubator-couchdb-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mike Leddy <m...@loop.com.br>
Subject Re: Having purge problems
Date Wed, 05 May 2010 13:42:15 GMT
Hi,

You are welcome.

I had imagined trying some sort of database swapping but I never liked 
the idea of switching where the app looks to the shadow/replacement and
doing it correctly while always available and replicating.

I decided to bite the bullet and went for the my desired solution. It 
may be completely against the grain of good practice but I decided to 
patch couchdb to be able to purge deleted data on compaction.

Please note I am an Erlang/Couchdb newbie.

First it seemed reasonable to use revs_limit to trigger deleted document
removal as I also want to clear out older revisions as well. I decided
that a revs_limit = 0 might be appropriate.

Patching couch_db.erl to allow revs_limit = 0 gave me:

--- couchdb-0.11.0/src/couchdb/couch_db.erl     2010-03-04 02:17:44.000000000 -0300
+++ couchdb-0.11.0.new/src/couchdb/couch_db.erl 2010-05-04 17:18:54.000000000 -0300
@@ -323,7 +323,7 @@
 get_revs_limit(#db{revs_limit=Limit}) ->
     Limit.
 
-set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit > 0 ->
+set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit >= 0 ->
     check_is_admin(Db),
     gen_server:call(Pid, {set_revs_limit, Limit}, infinity);
 set_revs_limit(_Db, _Limit) ->

Making sure that revs_limit = 0 doesn't wipe out the whole database:

--- couchdb-0.11.0/src/couchdb/couch_key_tree.erl       2009-11-21 10:43:43.000000000 -0300
+++ couchdb-0.11.0.new/src/couchdb/couch_key_tree.erl   2010-05-04 17:40:57.000000000 -0300
@@ -314,7 +314,7 @@
     % flatten each branch in a tree into a tree path
     Paths = get_all_leafs_full(Trees),
 
-    Paths2 = [{Pos, lists:sublist(Path, Limit)} || {Pos, Path} <- Paths],
+    Paths2 = [{Pos, lists:sublist(Path, lists:max([Limit, 1]))} || {Pos, Path} <- Paths],
 
     % convert paths back to trees
     lists:foldl(

Now the trickier part, choosing a good place to filter out the deleted 
docs.... After several failed attempts I chose this:

--- couchdb-0.11.0/src/couchdb/couch_db_updater.erl     2010-02-22 12:20:53.000000000 -0300
+++ couchdb-0.11.0.new/src/couchdb/couch_db_updater.erl 2010-05-05 09:19:50.000000000 -0300
@@ -736,9 +736,16 @@
         end, Tree).
             
 
-copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
-    Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
-    LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids),
+copy_docs(#db{revs_limit=Limit}=Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) ->
+    if Limit > 0 ->
+      Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
+      LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids);
+    true ->
+      AllIds = [Id || #doc_info{id=Id} <- InfoBySeq],
+      BaseResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, AllIds),
+      LookupResults = [Filtered || {ok, #full_doc_info{deleted=false}}=Filtered <- BaseResults],
+      Ids = [Id || #full_doc_info{id=Id} <- LookupResults]
+    end,
 
     % write out the attachments
     NewFullDocInfos0 = lists:map(

Now I can simply:

curl -X PUT 'localhost:5984/db/_revs_limit' -d '0'

and then:

curl -X POST 'localhost:5984/db/_compact'

Hopefully I haven broken anything - I am still running some tests but
it looks like am able to do what I need and stay 100% available without
altering my application.

I am not suggesting that anyone use this as it is but it might trigger 
someone to incorporate something in couchdb that has a similar 
functionality.

Best regards,

Mike


On Tue, 2010-05-04 at 09:09 -0700, J Chris Anderson wrote:
> On May 3, 2010, at 8:56 AM, Mike Leddy wrote:
> 
> > Hi,
> > 
> > I am currently on couchdb 0.11.0 using official debian packages with
> > erlang 1:13.b.4-dfsg-4 and I am having problems purging old documents.
> > 
> > My database is constantly receiving new data and old data (more than
> > six weeks) is being deleted. I have been running like this for several
> > months and the overhead of old deleted document 'stubs' is becoming
> > relevant in day to day operations such as new replications, database
> > compaction etc.
> > 
> > I decided that it would be best to purge the old deleted documents
> > so that the database would compact better and only contain relevant
> > ie. recent data.
> > 
> > [What I would really like would be a compact that does not include
> > documents that match a filter function, then I could do this on each
> > node independently.]
> > 
> > Unfortunately I am encountering problems purging the documents. I wrote
> > a script to process all the documents via _changes and purge the old
> > documents but I keep hitting documents that cannot be purged.
> > 
> 
> Thanks for the bug report. One way I've seen people accomplish this use case that doesn't
involve purging, is by storing documents into a new database each week, and then throwing
out old database files. 
> 
> Purging is really designed for removing secret data that was accidentally saved, more
than for reclaiming space. Thanks for the bug report - hopefully it will be easy to fix.
> 
> Chris
> 
> > Here is the start of my changes feed:
> > 
> > curl 'localhost:5984/iris/_changes?limit=5&since=0'
> > {"results":[
> > {"seq":2,"id":"_design/admin","changes":[{"rev":"1-ea95c1898a2c779d664c1d1b71a24f33"}]},
> > {"seq":22435808,"id":"1259540160F2016","changes":[{"rev":"2-7dcfd742f74c79286c3f3093595a83df"}],"deleted":true},
> > {"seq":22435809,"id":"1259540640F2016","changes":[{"rev":"2-6bd122eb9f83c0838bc9875a1b73abaf"}],"deleted":true},
> > {"seq":22435810,"id":"1259616780F2443","changes":[{"rev":"2-53e2311f5de7058fbfd55979816d3efc"}],"deleted":true},
> > {"seq":22435811,"id":"1259616784F2443","changes":[{"rev":"2-caaff4cd1290f7807c2bcfeb6edc39e0"}],"deleted":true}
> > ],
> > "last_seq":22435811}
> > 
> > This is a compacted copy of my main production database which is
> > already on seq 106280009.
> > 
> > When i try to purge I get a badarity error:
> > 
> > curl -X POST 'localhost:5984/iris/_purge' -d
> > '{"1259540160F2016":["2-7dcfd742f74c79286c3f3093595a83df"]}'
> > {"error":"{{badarity,{#Fun<couch_db_updater.25.101160745>,\n
> > [{2,<<124,230,79,165,16,199,208,127,32,211,160,223,180,12,3,28>>},\n
> > {true,4185,19290621}]}},\n [{couch_key_tree,map_leafs_simple,3},\n
> > {couch_key_tree,map_leafs_simple,3},\n  {couch_key_tree,map_leafs,2},\n
> > {couch_db_updater,'-handle_call/3-fun-2-',2},\n  {lists,mapfoldl,3},\n
> > {couch_db_updater,handle_call,3},\n  {gen_server,handle_msg,5},\n
> > {proc_lib,init_p_do_apply,3}]}","reason":"{gen_server,call,\n
> > [<0.28323.1>,\n             {purge_docs,[{<<\"1259540160F2016\">>,\n
> > [{2,\n
> > <<125,207,215,66,247,76,121,40,108,63,48,\n
> > 147,89,90,131,223>>}]}]}]}"}
> > 
> > This is what appears in my server log:
> > 
> > [Mon, 03 May 2010 15:45:03 GMT] [error] [<0.28323.1>] ** Generic server
> > <0.28323.1> terminating 
> > ** Last message in was {purge_docs,[{<<"1259540160F2016">>,
> >                                     [{2,
> > 
> > <<125,207,215,66,247,76,121,40,108,
> >                                         63,48,147,89,90,131,223>>}]}]}
> > ** When Server state == {db,<0.28322.1>,<0.28323.1>,nil,
> > 
> > <<"1272901503155514">>,<0.28320.1>,<0.28324.1>,
> >                            {db_header,5,106014165,0,
> >                                {59608213341,{24516895,36227125}},
> >                                {59608203141,60744020},
> >                                {59608273696,[]},
> >                                0,nil,nil,1000},
> >                            106014165,
> >                            {btree,<0.28320.1>,
> >                                {59608213341,{24516895,36227125}},
> >                                #Fun<couch_db_updater.7.132302543>,
> >                                #Fun<couch_db_updater.8.107957134>,
> >                                #Fun<couch_btree.5.124754102>,
> >                                #Fun<couch_db_updater.9.46112288>},
> >                            {btree,<0.28320.1>,
> >                                {59608203141,60744020},
> >                                #Fun<couch_db_updater.10.19027664>,
> >                                #Fun<couch_db_updater.11.35033879>,
> >                                #Fun<couch_btree.5.124754102>,
> >                                #Fun<couch_db_updater.12.56344865>},
> >                            {btree,<0.28320.1>,
> >                                {59608273696,[]},
> >                                #Fun<couch_btree.0.83553141>,
> >                                #Fun<couch_btree.1.30790806>,
> >                                #Fun<couch_btree.2.124754102>,nil},
> >                            106014165,<<"iris">>,
> > 
> > "/var/lib/couchdb/0.11.0/iris.couch",[],[],nil,
> >                            {user_ctx,null,[],undefined},
> >                            nil,1000,
> >                            [before_header,after_header,on_file_open]}
> > ** Reason for termination == 
> > ** {{badarity,{#Fun<couch_db_updater.25.101160745>,
> >               [{2,
> > 
> > <<124,230,79,165,16,199,208,127,32,211,160,223,180,12,3,28>>},
> >                {true,4185,19290621}]}},
> >    [{couch_key_tree,map_leafs_simple,3},
> >     {couch_key_tree,map_leafs_simple,3},
> >     {couch_key_tree,map_leafs,2},
> >     {couch_db_updater,'-handle_call/3-fun-2-',2},
> >     {lists,mapfoldl,3},
> >     {couch_db_updater,handle_call,3},
> >     {gen_server,handle_msg,5},
> >     {proc_lib,init_p_do_apply,3}]}
> > 
> > 
> > [Mon, 03 May 2010 15:45:03 GMT] [error] [<0.28323.1>]
> > {error_report,<0.31.0>,
> >    {<0.28323.1>,crash_report,
> >     [[{initial_call,{couch_db_updater,init,['Argument__1']}},
> >       {pid,<0.28323.1>},
> >       {registered_name,[]},
> >       {error_info,
> >           {exit,
> >               {{badarity,
> >                    {#Fun<couch_db_updater.25.101160745>,
> >                     [{2,
> > 
> > <<124,230,79,165,16,199,208,127,32,211,160,223,180,12,
> >                         3,28>>},
> >                      {true,4185,19290621}]}},
> >                [{couch_key_tree,map_leafs_simple,3},
> >                 {couch_key_tree,map_leafs_simple,3},
> >                 {couch_key_tree,map_leafs,2},
> >                 {couch_db_updater,'-handle_call/3-fun-2-',2},
> >                 {lists,mapfoldl,3},
> >                 {couch_db_updater,handle_call,3},
> >                 {gen_server,handle_msg,5},
> >                 {proc_lib,init_p_do_apply,3}]},
> > 
> > [{gen_server,terminate,6},{proc_lib,init_p_do_apply,3}]}},
> >       {ancestors,
> > 
> > [<0.28322.1>,couch_server,couch_primary_services,couch_server_sup,
> >            <0.32.0>]},
> >       {messages,[]},
> >       {links,[<0.28322.1>]},
> >       {dictionary,[]},
> >       {trap_exit,false},
> >       {status,running},
> >       {heap_size,4181},
> >       {stack_size,24},
> >       {reductions,42618}],
> >      []]}}
> > 
> > [Mon, 03 May 2010 15:45:03 GMT] [error] [<0.28291.1>] Uncaught error in
> > HTTP request: {exit,
> >                                 {{{badarity,
> > 
> > {#Fun<couch_db_updater.25.101160745>,
> >                                     [{2,
> > 
> > <<124,230,79,165,16,199,208,127,32,211,
> >                                         160,223,180,12,3,28>>},
> >                                      {true,4185,19290621}]}},
> >                                   [{couch_key_tree,map_leafs_simple,3},
> >                                    {couch_key_tree,map_leafs_simple,3},
> >                                    {couch_key_tree,map_leafs,2},
> >                                    {couch_db_updater,
> >                                     '-handle_call/3-fun-2-',2},
> >                                    {lists,mapfoldl,3},
> >                                    {couch_db_updater,handle_call,3},
> >                                    {gen_server,handle_msg,5},
> >                                    {proc_lib,init_p_do_apply,3}]},
> >                                  {gen_server,call,
> >                                   [<0.28323.1>,
> >                                    {purge_docs,
> >                                     [{<<"1259540160F2016">>,
> >                                       [{2,
> >                                         <<125,207,215,66,247,76,121,
> >                                           40,108,63,48,147,89,90,131,
> >                                           223>>}]}]}]}}}
> > 
> > [Mon, 03 May 2010 15:45:03 GMT] [info] [<0.28291.1>] Stacktrace:
> > [{gen_server,call,2},
> >             {couch_httpd_db,db_req,2},
> >             {couch_httpd_db,do_db_req,2},
> >             {couch_httpd,handle_request_int,5},
> >             {mochiweb_http,headers,5},
> >             {proc_lib,init_p_do_apply,3}]
> > 
> > [Mon, 03 May 2010 15:45:03 GMT] [info] [<0.28291.1>] 127.0.0.1 - -
> > 'POST' /iris/_purge 500
> > 
> > Any suggestions would be greatly appreciated.
> > 
> > Thanks,
> > 
> > Mike
> > 
> > 
> > 
> > 
> 
> 




Mime
View raw message