Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add batch view method for faster pagination #156

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions lib/couchrest/model/designs/view.rb
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,50 @@ def current_page
(offset_value / limit_value) + 1
end

# == 'Fast Paging', a.k.a. Batch Loading
#
# Pagination using just skip and limit can be very slow as CouchDB
# still needs to read all the rows of the view that it skips. That's
# why the CouchDB team recommends to use the skip parameter only with
# "single digit values." The batch method follows the 'Fast Paging'
# recipe [1] for using startkey(_docid) for faster pagination.
#
# Note that 'Fast Paging' does not allow jumping directly to a specific
# page within the results. If that is required the page method should
# be used instead.
#
# Use of the batch method is recommended esp. for use cases where it is
# necessary to load all the documents in a view for processing but
# loading them all at once is undesirable, e.g. due to memory
# constraints.
#
# [1] http://guide.couchdb.org/draft/recipes.html#pagination
def batch(batch_size, &block)
raise "View#batch cannot be used with limit or skip options" if query[:limit] or query[:skip]
raise "View#batch cannot be used with startkey option" if query[:startkey] or query[:startkey_docid]

return to_enum(:batch, batch_size) unless block_given?

query[:limit] = batch_size

last = nil
begin
reset!
unless last.nil?
query[:skip] = 1
query[:startkey] = last.key
query[:startkey_docid] = last.id
end
execute

rows = self.rows
if rows.length > 0
last = rows.last
yield docs
end
end until rows.length < batch_size
end

protected

def include_docs!
Expand Down
50 changes: 47 additions & 3 deletions spec/unit/designs/view_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,33 @@ class DesignViewModel < CouchRest::Model::Base
end
end

describe "#batch" do
it "should execute query repeatedly until number of results is less than batch size" do
row = mock("Rock")
row.should_receive(:key)
row.should_receive(:id)
@obj.should_receive(:execute).twice.and_return(true)
@obj.should_receive(:rows).twice.and_return([nil, row], [nil])
@obj.should_receive(:docs).twice
@obj.batch(2) {}
end
it "should return an enumerator when called without a block" do
@obj.batch(2).should be_kind_of(Enumerator)
end
it "should raise error if limit set" do
@obj.query[:limit] = 5
lambda { @obj.batch(2) }.should raise_error
end
it "should raise error if skip set" do
@obj.query[:skip] = 5
lambda { @obj.batch(2) }.should raise_error
end
it "should raise error if startkey set" do
@obj.query[:startkey] = 'bar'
lambda { @obj.batch(2) }.should raise_error
end
end

describe "#database" do
it "should update query with value" do
@obj.should_receive(:update_query).with({:database => 'foo'})
Expand Down Expand Up @@ -857,9 +884,6 @@ class DesignViewModel < CouchRest::Model::Base
end

describe "index information" do
it "should provide total_rows" do
DesignViewModel.by_name.total_rows.should eql(5)
end
it "should provide total_rows" do
DesignViewModel.by_name.total_rows.should eql(5)
end
Expand Down Expand Up @@ -914,6 +938,26 @@ class DesignViewModel < CouchRest::Model::Base
end
end

describe "batch load" do
before :each do
@results = []
end
it "should return all the documents" do
DesignViewModel.by_name.batch(3) do |docs|
@results << docs
end
@results.flatten.zip(@objs).each do |actual, expected|
actual.should eql(expected)
end
end
it "should return the documents in batches of 2" do
DesignViewModel.by_name.batch(2) do |docs|
@results << docs
end
@results.map{ |b| b.length }.should eql([2, 2, 1])
end
end

end


Expand Down