Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/lazy load encode improvements #21

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/estratto/content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
module Estratto
class Content
def self.for(file_path)
content = File.read(file_path)
content = File.open(file_path)
encoded_content = Encoder.new(content).encode
encoded_content.split("\n")
encoded_content
end
end
end
9 changes: 6 additions & 3 deletions lib/estratto/encoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ def initialize(content)
end

def encode
CharlockHolmes::Converter.convert(content, encoding, 'UTF-8')
content.lazy.map do |line|
charset = detect(line)
CharlockHolmes::Converter.convert(line, charset[:encoding], 'UTF-8').chomp
end
end

private

def encoding
CharlockHolmes::EncodingDetector.detect(content)[:encoding]
def detect(line)
CharlockHolmes::EncodingDetector.detect(line)
end
end
end
22 changes: 22 additions & 0 deletions lib/estratto/helpers/register_enumerator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
require_relative '../register'

module Estratto
module Helpers
class RegisterEnumerator < Enumerator::Lazy
def initialize(raw_content, layout)
super(raw_content.each_with_index) do |yielder, (line, index)|
register_layout = layout.register_fields_for(line[layout.prefix_range])
next if register_layout.nil?

register = Register.new(line, index, register_layout).refine

if block_given?
yield register
else
yielder << register
end
end
end
end
end
end
8 changes: 2 additions & 6 deletions lib/estratto/parser.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
require_relative 'register'
require_relative 'content'
require_relative 'helpers/register_enumerator'

module Estratto
class Parser
Expand All @@ -11,11 +11,7 @@ def initialize(file_path, layout)
end

def perform
@data ||= raw_content.map.with_index do |line, index|
register_layout = layout.register_fields_for(line[layout.prefix_range])
next if register_layout.nil?
Register.new(line, index, register_layout).refine
end.compact
@data ||= Helpers::RegisterEnumerator.new(raw_content, layout)
end

def raw_content
Expand Down
9 changes: 7 additions & 2 deletions spec/lib/estratto/content_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@
"025000020181225JUSTDESCRIPTION"
]
end
it do
expect(described_class.for(file_path)).to eq(expected_content)

it "returns an Enumerator::Lazy object" do
expect(described_class.for(file_path)).to be_a Enumerator::Lazy
end

it "returns the right content" do
expect(described_class.for(file_path).to_a).to match_array expected_content
end
end
end
9 changes: 7 additions & 2 deletions spec/lib/estratto/document_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@
}
]
end
it do
expect(subject.process(file: file_path, layout: layout_path)).to eq(data_refined)

it "returns an Enumerator::Lazy object" do
expect(subject.process(file: file_path, layout: layout_path)).to be_a Enumerator::Lazy
end

it "returns the right content" do
expect(subject.process(file: file_path, layout: layout_path).to_a).to match_array data_refined
end
end
end
27 changes: 16 additions & 11 deletions spec/lib/estratto/encoder_spec.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
RSpec.describe Estratto::Encoder do
subject { described_class.new(original_content) }
let(:instance) { described_class.new(original_content) }

context 'ISO-8859-1 original encoding' do
let(:original_content) { "026000020181227DESCRI\xC7\xC3O\n" }
let(:encoded_content) { "026000020181227DESCRIÇÃO\n" }
describe "#encode" do
subject { instance.encode }

it 'converts the original content to UTF-8' do
expect(subject.encode).to eq(encoded_content)
context 'ISO-8859-1 original encoding' do
let(:original_content) { ["026000020181227DESCRI\xC7\xC3O\n"] }
let(:encoded_content) { ["026000020181227DESCRIÇÃO"] }

it 'converts the original content to UTF-8' do
expect(subject.to_a).to match_array encoded_content
end
end
end

context 'UTF-8 original encoding' do
let(:original_content) { "025000020181225JUSTDESCRIPTION\n" }
context 'UTF-8 original encoding' do
let(:original_content) { ["025000020181225JUSTDESCRIPTION\n"] }
let(:encoded_content) { ["025000020181225JUSTDESCRIPTION"] }

it 'keeps the original content' do
expect(subject.encode).to eq(original_content)
it 'keeps the original content' do
expect(subject.to_a).to match_array encoded_content
end
end
end
end
51 changes: 51 additions & 0 deletions spec/lib/estratto/helpers/register_enumerator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
RSpec.describe Estratto::Helpers::RegisterEnumerator do
subject { instance }

let(:instance) { described_class.new(content, layout) }
let(:content) { Estratto::Content.for(file_path) }
let(:layout) { Estratto::Layout::Factory.fabricate(layout_path) }
let(:file_path) { 'spec/fixtures/files/data_to_parse.txt' }
let(:layout_path) { 'spec/fixtures/files/complete_layout.yml' }

let(:john_doe) do
{
name: 'JOHN DOE',
gifted_at: Date.new(2018, 12, 25),
value: 375.95,
buyed_times: 123
}
end
let(:mary_jones) do
{
name: 'MARY JONES',
gifted_at: Date.new(2018, 12, 25),
value: 500.0,
buyed_times: 12345
}
end
let(:description) do
{
value: 500.0,
generated_at: Date.new(2018, 12, 25),
description: 'JUSTDESCRIPTION'
}
end

it { is_expected.to be_a Enumerator::Lazy }

describe '#to_a' do
subject { instance.to_a }

it 'return only rows which prefix is mapped by the layout' do
is_expected.to eq [john_doe, mary_jones, description]
end
end

describe '#each' do
it "yields only Registers for rows which prefix is mapped by the layout" do
expect { |block| instance.each(&block) }.to(
yield_successive_args(john_doe, mary_jones, description)
)
end
end
end
18 changes: 14 additions & 4 deletions spec/lib/estratto/parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@
}
]
end
it do
expect(subject.perform).to eq(data_refined)

it "returns an Enumerator::Lazy object" do
expect(subject.perform).to be_a Estratto::Helpers::RegisterEnumerator
end

it "returns the right content" do
expect(subject.perform.to_a).to match_array data_refined
end
end

Expand All @@ -44,8 +49,13 @@
}
]
end
it do
expect(subject.perform).to eq(data_refined)

it "returns an Enumerator::Lazy object" do
expect(subject.perform).to be_a Enumerator::Lazy
end

it "returns the right content" do
expect(subject.perform.to_a).to match_array data_refined
end
end
end
Expand Down