From abf92c4b7225d74ce540031ff87ef2ce56eec749 Mon Sep 17 00:00:00 2001 From: Jan Hecking Date: Sun, 7 Oct 2012 03:16:18 +0800 Subject: [PATCH 1/3] add batch view method for faster pagination 'Fast Paging', a.k.a. Batch Loading Pagination using just skip and limit can be very slow as CouchDB still needs to read all the rows of the view that it skips. That's why the CouchDB team recommends to use the skip parameter only with "single digit values." The batch method follows the 'Fast Paging' recipe [1] for using startkey(_docid) for faster pagination. Note that 'Fast Paging' does not allow jumping directly to a specific page within the results. If that is required the page method should be used instead. Use of the batch method is recommended esp. for use cases where it is necessary to load all the documents in a view for processing but loading them all at once is undesirable, e.g. due to memory constraints. [1] http://guide.couchdb.org/draft/recipes.html#pagination --- lib/couchrest/model/designs/view.rb | 42 +++++++++++++++++++++++++++++ spec/unit/designs/view_spec.rb | 35 +++++++++++++++++++++--- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/lib/couchrest/model/designs/view.rb b/lib/couchrest/model/designs/view.rb index ff68a23e..270a4149 100644 --- a/lib/couchrest/model/designs/view.rb +++ b/lib/couchrest/model/designs/view.rb @@ -380,6 +380,48 @@ def current_page (offset_value / limit_value) + 1 end + # == 'Fast Paging', a.k.a. Batch Loading + # + # Pagination using just skip and limit can be very slow as CouchDB + # still needs to read all the rows of the view that it skips. That's + # why the CouchDB team recommends to use the skip parameter only with + # "single digit values." The batch method follows the 'Fast Paging' + # recipe [1] for using startkey(_docid) for faster pagination. + # + # Note that 'Fast Paging' does not allow jumping directly to a specific + # page within the results. If that is required the page method should + # be used instead. + # + # Use of the batch method is recommended esp. for use cases where it is + # necessary to load all the documents in a view for processing but + # loading them all at once is undesirable, e.g. due to memory + # constraints. + # + # [1] http://guide.couchdb.org/draft/recipes.html#pagination + def batch(batch_size, &block) + raise "View#batch cannot be used with limit or skip options" if query[:limit] or query[:skip] + raise "View#batch cannot be used with startkey option" if query[:startkey] or query[:startkey_docid] + + query[:limit] = batch_size + + last = nil + begin + reset! + unless last.nil? + query[:skip] = 1 + query[:startkey] = last.key + query[:startkey_docid] = last.id + end + execute + + rows = self.rows + if rows.length > 0 + last = rows.last + yield docs + end + end until rows.length < batch_size + end + protected def include_docs! diff --git a/spec/unit/designs/view_spec.rb b/spec/unit/designs/view_spec.rb index bed7e8e3..1fdd6d7a 100644 --- a/spec/unit/designs/view_spec.rb +++ b/spec/unit/designs/view_spec.rb @@ -340,6 +340,18 @@ class DesignViewModel < CouchRest::Model::Base end end + describe "#batch" do + it "should execute query repeatedly until number of results is less than batch size" do + row = mock("Rock") + row.should_receive(:key) + row.should_receive(:id) + @obj.should_receive(:execute).twice.and_return(true) + @obj.should_receive(:rows).twice.and_return([nil, row], [nil]) + @obj.should_receive(:docs).twice + @obj.batch(2) {} + end + end + describe "#database" do it "should update query with value" do @obj.should_receive(:update_query).with({:database => 'foo'}) @@ -857,9 +869,6 @@ class DesignViewModel < CouchRest::Model::Base end describe "index information" do - it "should provide total_rows" do - DesignViewModel.by_name.total_rows.should eql(5) - end it "should provide total_rows" do DesignViewModel.by_name.total_rows.should eql(5) end @@ -914,6 +923,26 @@ class DesignViewModel < CouchRest::Model::Base end end + describe "batch load" do + before :each do + @results = [] + end + it "should return all the documents" do + DesignViewModel.by_name.batch(3) do |docs| + @results << docs + end + @results.flatten.zip(@objs).each do |actual, expected| + actual.should eql(expected) + end + end + it "should return the documents in batches of 2" do + DesignViewModel.by_name.batch(2) do |docs| + @results << docs + end + @results.map{ |b| b.length }.should eql([2, 2, 1]) + end + end + end From b1680823537d552bd9b624aec5628a30b6d1c7ce Mon Sep 17 00:00:00 2001 From: Jan Hecking Date: Fri, 12 Oct 2012 11:09:16 +0800 Subject: [PATCH 2/3] support Enumerators for batch loading When called without a block argument the batch() method should return an Enumerator object instead. --- lib/couchrest/model/designs/view.rb | 2 ++ spec/unit/designs/view_spec.rb | 3 +++ 2 files changed, 5 insertions(+) diff --git a/lib/couchrest/model/designs/view.rb b/lib/couchrest/model/designs/view.rb index 270a4149..c058b127 100644 --- a/lib/couchrest/model/designs/view.rb +++ b/lib/couchrest/model/designs/view.rb @@ -402,6 +402,8 @@ def batch(batch_size, &block) raise "View#batch cannot be used with limit or skip options" if query[:limit] or query[:skip] raise "View#batch cannot be used with startkey option" if query[:startkey] or query[:startkey_docid] + return to_enum(:batch, batch_size) unless block_given? + query[:limit] = batch_size last = nil diff --git a/spec/unit/designs/view_spec.rb b/spec/unit/designs/view_spec.rb index 1fdd6d7a..d3d9afb7 100644 --- a/spec/unit/designs/view_spec.rb +++ b/spec/unit/designs/view_spec.rb @@ -350,6 +350,9 @@ class DesignViewModel < CouchRest::Model::Base @obj.should_receive(:docs).twice @obj.batch(2) {} end + it "should return an enumerator when called without a block" do + @obj.batch(2).should be_kind_of(Enumerator) + end end describe "#database" do From 5ab280bf88339acbc6dfea649908f6ac3ba4d098 Mon Sep 17 00:00:00 2001 From: Jan Hecking Date: Tue, 15 Jan 2013 19:37:13 +0800 Subject: [PATCH 3/3] add specs for exceptions raised by batch() --- spec/unit/designs/view_spec.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/spec/unit/designs/view_spec.rb b/spec/unit/designs/view_spec.rb index d3d9afb7..d5ef753d 100644 --- a/spec/unit/designs/view_spec.rb +++ b/spec/unit/designs/view_spec.rb @@ -353,6 +353,18 @@ class DesignViewModel < CouchRest::Model::Base it "should return an enumerator when called without a block" do @obj.batch(2).should be_kind_of(Enumerator) end + it "should raise error if limit set" do + @obj.query[:limit] = 5 + lambda { @obj.batch(2) }.should raise_error + end + it "should raise error if skip set" do + @obj.query[:skip] = 5 + lambda { @obj.batch(2) }.should raise_error + end + it "should raise error if startkey set" do + @obj.query[:startkey] = 'bar' + lambda { @obj.batch(2) }.should raise_error + end end describe "#database" do