From dc9ff9b937eb8e1608d43e175b8f2ecdcbe3578d Mon Sep 17 00:00:00 2001 From: Chris Thompson Date: Wed, 13 Apr 2022 14:19:15 -0600 Subject: [PATCH] Allow dumping and loading bitarrays --- README.md | 20 +++++++++++ lib/bitarray.rb | 1 + lib/bitarray/bit_array.rb | 37 ++++++++++++++++++- lib/bitarray/bit_array_file.rb | 38 ++++++++++++++++++++ test/test_bit_array.rb | 45 +++++++++++++++++++++++ test/test_bit_array_file.rb | 65 ++++++++++++++++++++++++++++++++++ 6 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 lib/bitarray/bit_array_file.rb create mode 100644 test/test_bit_array_file.rb diff --git a/README.md b/README.md index c59cd58..846aa22 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,28 @@ ba = BitArray.new(16, ["0000111111110000"].pack('B*'), reverse_byte: false) ba.to_s # "0000111111110000" ``` +Saving and loading `BitArray`: + +```ruby +ba = BitArray.new(16, ["0000111111110000"].pack('B*')) +ba.dump(File.new("bitarray.dat", "w")) +#=> # +ba = BitArray.load(File.open("bitarray.dat")) +ba.to_s # "1111000000001111" +``` + +Read-only access without loading it into memory: + +```ruby +ba = BitArray.new(16, ["0000111111110000"].pack('B*')) +ba.dump(File.new("bitarray.dat", "w")) +ba_ro = BitArrayFile.new(filename: "bitarray.dat") +ba_ro[0] # 1 +ba_ro[4] # 0 +``` ## History +- 1.4 in 2022 (cleanups, add unions, dump/load, and BitArrayFile) - 1.3 in 2022 (cleanups and a minor perf tweak) - 1.2 in 2018 (Added option to skip reverse the bits for each byte by @dalibor) - 1.1 in 2018 (fixed a significant bug) diff --git a/lib/bitarray.rb b/lib/bitarray.rb index e70e66c..99fd42e 100644 --- a/lib/bitarray.rb +++ b/lib/bitarray.rb @@ -1 +1,2 @@ require_relative "bitarray/bit_array" +require_relative "bitarray/bit_array_file" diff --git a/lib/bitarray/bit_array.rb b/lib/bitarray/bit_array.rb index 476c34a..f4516ec 100644 --- a/lib/bitarray/bit_array.rb +++ b/lib/bitarray/bit_array.rb @@ -2,7 +2,8 @@ class BitArray attr_reader :field, :reverse_byte, :size include Enumerable - VERSION = "1.3.0" + VERSION = "1.4.0" + HEADER_LENGTH = 8 + 1 # QC (@size, @reverse_byte) def initialize(size, field = nil, reverse_byte: true) @size = size @@ -24,6 +25,26 @@ def [](position) (@field.getbyte(position >> 3) & (1 << (byte_position(position) % 8))) > 0 ? 1 : 0 end + def ==(rhs) + @size == rhs.size && @reverse_byte == rhs.reverse_byte && @field == rhs.field + end + + # Allows joining (union) two bitarrays of identical size. + # The resulting bitarray will contain any bit set in either constituent arrays. + # |= is implicitly defined, so you can do source_ba |= other_ba + def |(rhs) + raise ArgumentError.new("Bitarray sizes must be identical") if @size != rhs.size + raise ArgumentError.new("Reverse byte settings must be identical") if @reverse_byte != rhs.reverse_byte + + combined = BitArray.new(@size, @field, reverse_byte: @reverse_byte) + rhs.field.each_byte.inject(0) do |byte_pos, byte| + combined.field.setbyte(byte_pos, combined.field.getbyte(byte_pos) | byte) + byte_pos + 1 + end + + combined + end + # Iterate over each bit def each return to_enum(:each) unless block_given? @@ -55,4 +76,18 @@ def total_set private def byte_position(position) @reverse_byte ? position : 7 - position end + + # Save contents to an io device such as a file + def dump(io) + io.write([@size, @reverse_byte ? 1 : 0].pack("QC")) + io.write(@field.b) + io + end + + # Load bitarray from an io device such as a file + def self.load(io) + size, reverse_byte = io.read(9).unpack("QC") + field = io.read + new(size, field, reverse_byte: reverse_byte == 1) + end end diff --git a/lib/bitarray/bit_array_file.rb b/lib/bitarray/bit_array_file.rb new file mode 100644 index 0000000..5733df9 --- /dev/null +++ b/lib/bitarray/bit_array_file.rb @@ -0,0 +1,38 @@ +require_relative "bit_array" + +# Read-only access to a BitArray dumped to disk. +# This is considerably slower than using the RAM-based BitArray, but +# avoids the memory requirements and initial setup time. +class BitArrayFile + HEADER_LENGTH = BitArray::HEADER_LENGTH + + attr_reader :io, :reverse_byte, :size + + def initialize(filename: nil, io: nil) + if io + @io = io + elsif filename + @io = File.open(filename, "r") + else + raise ArgumentError.new("Must specify a filename or io argument") + end + + @io.seek(0) + @size, @reverse_byte = @io.read(9).unpack("QC") + @reverse_byte = @reverse_byte != 0 + end + + # Read a bit (1/0) + def [](position) + seek_to(position >> 3) + (@io.getbyte & (1 << (byte_position(position) % 8))) > 0 ? 1 : 0 + end + + private def byte_position(position) + @reverse_byte ? position : 7 - position + end + + private def seek_to(position) + @io.seek(position + HEADER_LENGTH) + end +end diff --git a/test/test_bit_array.rb b/test/test_bit_array.rb index 157453c..68cba57 100644 --- a/test/test_bit_array.rb +++ b/test/test_bit_array.rb @@ -1,4 +1,5 @@ require "minitest/autorun" +require "tempfile" require_relative "../lib/bitarray" class TestBitArray < Minitest::Test @@ -79,6 +80,50 @@ def test_total_set ba[9] = 1 assert_equal 3, ba.total_set end + + def test_dump_load + ba_dump = BitArray.new(35) + [1, 5, 6, 7, 10, 16, 33].each { |i| ba_dump[i] = 1} + Tempfile.create("bit_array.dat") do |io| + ba_dump.dump(io) + io.rewind + ba_load = BitArray.load(io) + + assert_equal ba_dump, ba_load + end + end + + def test_union + set_bits = [1, 5, 6, 7, 10, 16, 33].shuffle + + ba_lhs = BitArray.new(35) + set_bits[0..3].each { |i| ba_lhs[i] = 1} + ba_rhs = BitArray.new(35) + # Deliberately overlap a little + set_bits[3..-1].each { |i| ba_rhs[i] = 1} + ba_expected = BitArray.new(35) + set_bits.each { |i| ba_expected[i] = 1} + + assert_equal ba_lhs | ba_rhs, ba_expected + end + + def test_union_unequal_sizes + ba_lhs = BitArray.new(4) + ba_rhs = BitArray.new(5) + + assert_raises ArgumentError do + ba_lhs | ba_rhs + end + end + + def test_union_unequal_reverse_bytes + ba_lhs = BitArray.new(4, reverse_byte: true) + ba_rhs = BitArray.new(4, reverse_byte: false) + + assert_raises ArgumentError do + ba_lhs | ba_rhs + end + end end class TestBitArrayWhenNonReversedByte < Minitest::Test diff --git a/test/test_bit_array_file.rb b/test/test_bit_array_file.rb new file mode 100644 index 0000000..9c49caf --- /dev/null +++ b/test/test_bit_array_file.rb @@ -0,0 +1,65 @@ +require "minitest/autorun" +require "tempfile" +require_relative "../lib/bitarray" + +class TestBitArrayFile < Minitest::Test + def setup + ba = BitArray.new(35) + [1, 5, 6, 7, 10, 16, 33].each { |i| ba[i] = 1} + @file = Tempfile.new("bit_array_file.dat") + ba.dump(@file) + @file.rewind + end + + def teardown + @file.close + @file.unlink + end + + def test_from_filename + baf = BitArrayFile.new(filename: @file.path) + for i in 0...35 + expected = [1, 5, 6, 7, 10, 16, 33].include?(i) ? 1 : 0 + assert_equal expected, baf[i] + end + end + + def test_from_io + baf = BitArrayFile.new(io: @file) + for i in 0...35 + expected = [1, 5, 6, 7, 10, 16, 33].include?(i) ? 1 : 0 + assert_equal expected, baf[i] + end + end +end + +class TestBitArrayFileWhenNonReversedByte < Minitest::Test + def setup + ba = BitArray.new(35, reverse_byte: false) + [1, 5, 6, 7, 10, 16, 33].each { |i| ba[i] = 1} + @file = Tempfile.new("bit_array_file.dat") + ba.dump(@file) + @file.rewind + end + + def teardown + @file.close + @file.unlink + end + + def test_from_filename + baf = BitArrayFile.new(filename: @file.path) + for i in 0...35 + expected = [1, 5, 6, 7, 10, 16, 33].include?(i) ? 1 : 0 + assert_equal expected, baf[i] + end + end + + def test_from_io + baf = BitArrayFile.new(io: @file) + for i in 0...35 + expected = [1, 5, 6, 7, 10, 16, 33].include?(i) ? 1 : 0 + assert_equal expected, baf[i] + end + end +end