Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial stab at improving folding #1570

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ jobs:
timeout: 50
- ruby: truffleruby-head
timeout: 50
- ruby: jruby
timeout: 5
- ruby: jruby-head
timeout: 5
- ruby: jruby-9.4
timeout: 5
- ruby: jruby-9.3
timeout: 5
# - ruby: jruby
# timeout: 5
# - ruby: jruby-head
# timeout: 5
# - ruby: jruby-9.4
# timeout: 5
# - ruby: jruby-9.3
# timeout: 5
- ruby: jruby-9.2
timeout: 5
steps:
Expand Down
52 changes: 38 additions & 14 deletions lib/mail/fields/unstructured_field.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,37 +98,52 @@ def wrap_lines(name, folded_lines)
result.join("\r\n\s")
end

# folding strategy:
# The line is split into words separated by space or tab.
# If the line contains any non-ascii characters, or any words are expected to be too long to fit,
# then they are additionally split, and the line is marked for encoding
#
# The list of individual words is then used to fill up the output lines without overflowing.
# This is not always guaranteed to work, because there is a wide variation in the number of
# characters that are needed to encode a given character. If the resulting line would be too
# long, divide the original word into two chunks and add the pieces separately.
def fold(prepend = 0) # :nodoc:
# prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
encoding = normalized_encoding
encoding_overhead = "=?#{encoding}?Q??=".length
# The encoded string goes here ^ (between the ??)
max_safe_word = 78 - encoding_overhead - prepend # allow for encoding overhead + prefix
decoded_string = decoded.to_s
should_encode = !decoded_string.ascii_only?
words = decoded_string.split(/[ \t]/)
should_encode = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}
encoding_overhead = 0 unless should_encode
if should_encode
max_safe_re = Regexp.new(".{#{max_safe_word}}|.+$")
first = true
words = decoded_string.split(/[ \t]/).map do |word|
words = words.map do |word|
if first
first = !first
else
word = " #{word}"
end
if !word.ascii_only?
word
if word.ascii_only?
word.scan(max_safe_re)
else
word.scan(/.{7}|.+$/)
end
end.flatten
else
words = decoded_string.split(/[ \t]/)
end

folded_lines = []
while !words.empty?
limit = 78 - prepend
limit = limit - 7 - encoding.length if should_encode
limit = 78 - prepend - encoding_overhead
line = String.new
first_word = true
while !words.empty?
break unless word = words.first.dup

original_word = word # in case we need to try again

# Convert on 1.9+ only since we aren't sure of the current
# charset encoding on 1.8. We'd need to track internal/external
# charset on each field.
Expand All @@ -140,23 +155,32 @@ def fold(prepend = 0) # :nodoc:
word = encode_crlf(word)
# Skip to next line if we're going to go past the limit
# Unless this is the first word, in which case we're going to add it anyway
# Note: This means that a word that's longer than 998 characters is going to break the spec. Please fix if this is a problem for you.
# (The fix, it seems, would be to use encoded-word encoding on it, because that way you can break it across multiple lines and
# the linebreak will be ignored)
break if !line.empty? && (line.length + word.length + 1 > limit)
break if !line.empty? && (line.length + word.length >= limit)
# Remove the word from the queue ...
words.shift
# Add word separator
if first_word
first_word = false
else
line << " " if !should_encode
line << " " unless should_encode
end

# ... add it in encoded form to the current line
# but first check if we have overflowed
# should only happen for the first word on a line
if should_encode && (line.length + word.length > limit)
word, remain = original_word.scan(/.{3}|.+$/) # roughly half the original split
words.unshift remain # put the unused bit back
# re-encode shorter word
if charset && word.respond_to?(:encoding)
word = Encodings.transcode_charset(word, word.encoding, charset)
end
word = encode(word)
word = encode_crlf(word)
end
line << word
end
# Encode the line if necessary
# Mark the line as encoded if necessary
line = "=?#{encoding}?Q?#{line}?=" if should_encode
# Add the line to the output and reset the prepend
folded_lines << line
Expand Down
32 changes: 24 additions & 8 deletions spec/mail/encodings_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -283,18 +283,20 @@
expect(Mail::Encodings.value_decode(string)).to eq result
end

it "should not fold a long string that has no spaces" do
it "should fold a long non-ascii string that has no spaces" do
original = "ВосстановлениеВосстановлениеВашегопароля"
if original.respond_to?(:force_encoding)
original = original.dup.force_encoding('UTF-8')
result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
else
result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
end
result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
mail = Mail.new
mail.subject = original
expect(mail[:subject].decoded).to eq original
expect(mail[:subject].encoded).to eq result
encoded = mail[:subject].encoded
# check lines are not too long
encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
# check expected contents, ignoring folding
expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result
end

it "should round trip a complex string properly" do
Expand All @@ -306,13 +308,27 @@
mail = Mail.new
mail.subject = original
expect(mail[:subject].decoded).to eq original
expect(mail[:subject].encoded).to eq result
encoded = mail[:subject].encoded
# check lines are not too long
encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
# check expected contents, ignoring folding
expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')

mail = Mail.new(mail.encoded)
expect(mail[:subject].decoded).to eq original
expect(mail[:subject].encoded).to eq result
encoded = mail[:subject].encoded
# check lines are not too long
encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
# check expected contents, ignoring folding
expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')

mail = Mail.new(mail.encoded)
expect(mail[:subject].decoded).to eq original
expect(mail[:subject].encoded).to eq result
encoded = mail[:subject].encoded
# check lines are not too long
encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
# check expected contents, ignoring folding
expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
end

it "should round trip another complex string (koi-8)" do
Expand Down
55 changes: 50 additions & 5 deletions spec/mail/fields/unstructured_field_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,25 +149,66 @@
@field = Mail::UnstructuredField.new("Subject", string)
string = string.dup.force_encoding('UTF-8')
result = "Subject: =?UTF-8?Q?This_is_=E3=81=82_really_long_string_This_is_=E3=81=82?=\r\n\s=?UTF-8?Q?_really_long_string_This_is_=E3=81=82_really_long_string_This_is?=\r\n\s=?UTF-8?Q?_=E3=81=82_really_long_string_This_is_=E3=81=82_really_long?=\r\n\s=?UTF-8?Q?_string?=\r\n"
expect(@field.encoded).to eq result
expect(@field.decoded).to eq string
expect(@field.encoded).to eq result
end

it "should fold properly with my actual complicated header" do
string = %|{"unique_args": {"mailing_id":147,"account_id":2}, "to": ["[email protected]"], "category": "mailing", "filters": {"domainkeys": {"settings": {"domain":1,"enable":1}}}, "sub": {"{{open_image_url}}": ["http://betaling.larspind.local/O/token/147/Mailing::FakeRecipient"], "{{name}}": ["[FIRST NAME]"], "{{signup_reminder}}": ["(her kommer til at stå hvornår folk har skrevet sig op ...)"], "{{unsubscribe_url}}": ["http://betaling.larspind.local/U/token/147/Mailing::FakeRecipient"], "{{email}}": ["[email protected]"], "{{link:308}}": ["http://betaling.larspind.local/L/308/0/Mailing::FakeRecipient"], "{{confirm_url}}": [""], "{{ref}}": ["[REF]"]}}|
@field = Mail::UnstructuredField.new("X-SMTPAPI", string)
string = string.dup.force_encoding('UTF-8')
result = "X-SMTPAPI: =?UTF-8?Q?{=22unique=5Fargs=22:_{=22mailing=5Fid=22:147,=22a?=\r\n =?UTF-8?Q?ccount=5Fid=22:2},_=22to=22:_[[email protected]=22],_=22categ?=\r\n =?UTF-8?Q?ory=22:_=22mailing=22,_=22filters=22:_{=22domainkeys=22:_{=22sett?=\r\n =?UTF-8?Q?ings=22:_{=22domain=22:1,=22enable=22:1}}},_=22sub=22:_{=22{{op?=\r\n =?UTF-8?Q?en=5Fimage=5Furl}}=22:_[=22http://betaling.larspind.local/O?=\r\n =?UTF-8?Q?/token/147/Mailing::FakeRecipient=22],_=22{{name}}=22:_[=22[FIRST?=\r\n =?UTF-8?Q?_NAME]=22],_=22{{signup=5Freminder}}=22:_[=22=28her_kommer_til_at?=\r\n =?UTF-8?Q?_st=C3=A5_hvorn=C3=A5r_folk_har_skrevet_sig_op_...=29=22],?=\r\n =?UTF-8?Q?_=22{{unsubscribe=5Furl}}=22:_[=22http://betaling.larspind.?=\r\n =?UTF-8?Q?local/U/token/147/Mailing::FakeRecipient=22],_=22{{email}}=22:?=\r\n =?UTF-8?Q?_[[email protected]=22],_=22{{link:308}}=22:_[=22http://beta?=\r\n =?UTF-8?Q?ling.larspind.local/L/308/0/Mailing::FakeRecipient=22],_=22{{con?=\r\n =?UTF-8?Q?firm=5Furl}}=22:_[=22=22],_=22{{ref}}=22:_[=22[REF]=22]}}?=\r\n"
expect(@field.encoded).to eq result
expect(@field.decoded).to eq string
encoded = @field.encoded
# check lines are not too long
encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
# check expected contents, ignoring folding
expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
end

it "should fold properly with continuous spaces around the linebreak" do
@field = Mail::UnstructuredField.new("Subject", "This is a header that has continuous spaces around line break point, which should be folded properly")
subject = "This is a header that has continuous spaces around line break point, which should be folded properly"
@field = Mail::UnstructuredField.new("Subject", subject)
result = "Subject: This is a header that has continuous spaces around line break point,\s\r\n\s\s\s\swhich should be folded properly\r\n"
expect(@field.decoded).to eq subject
expect(@field.encoded).to eq result
end

it "should fold an ASCII-only subject with more than 78 characters and no white space" do
value = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
expect(value.length).to be > 78 - "Subject: ".length
@field = Mail::UnstructuredField.new("Subject", value)
expect(@field.decoded).to eq value
lines = @field.encoded.split("\r\n")
lines.each { |line| expect(line.length).to be <= 78 }
end

it "should fold an ASCII-only subject with more than 998 characters and no white space" do
value = "ThisIsASubjectHeaderMessageThatIsGoingToBeMoreThan998CharactersLong." * 20
@field = Mail::UnstructuredField.new("Subject", value)
expect(@field.decoded).to eq value
lines = @field.encoded.split("\r\n")
lines.each { |line| expect(line.length).to be <= 78 }
end

# TODO: tweak unstructured_field to always generate lines of at most 78 chars
it "should fold a Japanese subject with more than 998 characters long and no white space" do
value = "これは非常に長い日本語のSubjectです。空白はありません。" * 1
@field = Mail::UnstructuredField.new("Subject", value)
expect(@field.decoded).to eq value
lines = @field.encoded.split("\r\n")
lines.each { |line| expect(line.length).to be <= 78 }
end

# TODO: tweak unstructured_field to always generate lines of at most 78 chars
it "should fold full of emoji subject that is going to be more than 998 bytes unfolded" do
value = "😄" * 90
@field = Mail::UnstructuredField.new("Subject", value)
expect(@field.decoded).to eq value
lines = @field.encoded.split("\r\n")
lines.each { |line| expect(line.length).to be <= 78 }
end

end

describe "encoding non QP safe chars" do
Expand All @@ -179,9 +220,13 @@
end

describe "iso-2022-jp Subject" do
it "should encoded with ISO-2022-JP encoding" do
@field = Mail::UnstructuredField.new("Subject", "あいうえお")
it "should be encoded with ISO-2022-JP encoding" do
value = "あいうえお"
@field = Mail::UnstructuredField.new("Subject", value)
@field.charset = 'iso-2022-jp'
expect(@field.decoded).to eq value
lines = @field.encoded.split("\r\n")
lines.each { |line| expect(line.length).to be <= 78 }
expect(@field.encoded).to eq "Subject: =?ISO-2022-JP?Q?=1B$B$=22$$$&$=28$*=1B=28B?=\r\n"
end
end
Expand Down