mikel · sebbASF · Feb 22, 2023 · Feb 22, 2023 · Feb 22, 2023 · Feb 23, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -32,14 +32,14 @@ jobs:
            timeout: 50
          - ruby: truffleruby-head
            timeout: 50
-         - ruby: jruby
-           timeout: 5
-         - ruby: jruby-head
-           timeout: 5
-         - ruby: jruby-9.4
-           timeout: 5
-         - ruby: jruby-9.3
-           timeout: 5
+        #  - ruby: jruby
+        #    timeout: 5
+        #  - ruby: jruby-head
+        #    timeout: 5
+        #  - ruby: jruby-9.4
+        #    timeout: 5
+        #  - ruby: jruby-9.3
+        #    timeout: 5
          - ruby: jruby-9.2
            timeout: 5
     steps:

diff --git a/lib/mail/fields/unstructured_field.rb b/lib/mail/fields/unstructured_field.rb
@@ -98,37 +98,52 @@ def wrap_lines(name, folded_lines)
       result.join("\r\n\s")
     end
 
+    # folding strategy:
+    # The line is split into words separated by space or tab.
+    # If the line contains any non-ascii characters, or any words are expected to be too long to fit,
+    # then they are additionally split, and the line is marked for encoding
+    # 
+    # The list of individual words is then used to fill up the output lines without overflowing.
+    # This is not always guaranteed to work, because there is a wide variation in the number of
+    # characters that are needed to encode a given character. If the resulting line would be too
+    # long, divide the original word into two chunks and add the pieces separately.
     def fold(prepend = 0) # :nodoc:
+      #  prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
       encoding       = normalized_encoding
+      encoding_overhead = "=?#{encoding}?Q??=".length
+      # The encoded string goes here      ^ (between the ??)
+      max_safe_word = 78 - encoding_overhead - prepend # allow for encoding overhead + prefix
       decoded_string = decoded.to_s
-      should_encode  = !decoded_string.ascii_only?
+      words = decoded_string.split(/[ \t]/)
+      should_encode  = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}
+      encoding_overhead = 0 unless should_encode
       if should_encode
+        max_safe_re = Regexp.new(".{#{max_safe_word}}|.+$")
         first = true
-        words = decoded_string.split(/[ \t]/).map do |word|
+        words = words.map do |word|
           if first
             first = !first
           else
             word = " #{word}"
           end
-          if !word.ascii_only?
-            word
+          if word.ascii_only?
+            word.scan(max_safe_re)
           else
             word.scan(/.{7}|.+$/)
           end
         end.flatten
-      else
-        words = decoded_string.split(/[ \t]/)
       end
 
       folded_lines   = []
       while !words.empty?
-        limit = 78 - prepend
-        limit = limit - 7 - encoding.length if should_encode
+        limit = 78 - prepend - encoding_overhead
         line = String.new
         first_word = true
         while !words.empty?
           break unless word = words.first.dup
 
+          original_word = word # in case we need to try again
+
           # Convert on 1.9+ only since we aren't sure of the current
           # charset encoding on 1.8. We'd need to track internal/external
           # charset on each field.
@@ -140,23 +155,32 @@ def fold(prepend = 0) # :nodoc:
           word = encode_crlf(word)
           # Skip to next line if we're going to go past the limit
           # Unless this is the first word, in which case we're going to add it anyway
-          # Note: This means that a word that's longer than 998 characters is going to break the spec. Please fix if this is a problem for you.
-          # (The fix, it seems, would be to use encoded-word encoding on it, because that way you can break it across multiple lines and
-          # the linebreak will be ignored)
-          break if !line.empty? && (line.length + word.length + 1 > limit)
+          break if !line.empty? && (line.length + word.length >= limit)
           # Remove the word from the queue ...
           words.shift
           # Add word separator
           if first_word
             first_word = false
           else
-            line << " " if !should_encode
+            line << " " unless should_encode
           end
 
           # ... add it in encoded form to the current line
+          #  but first check if we have overflowed
+          # should only happen for the first word on a line
+          if should_encode && (line.length + word.length > limit)
+            word, remain = original_word.scan(/.{3}|.+$/) # roughly half the original split
+            words.unshift remain # put the unused bit back
+            # re-encode shorter word
+            if charset && word.respond_to?(:encoding)
+              word = Encodings.transcode_charset(word, word.encoding, charset)
+            end
+            word = encode(word)
+            word = encode_crlf(word)  
+          end
           line << word
         end
-        # Encode the line if necessary
+        # Mark the line as encoded if necessary
         line = "=?#{encoding}?Q?#{line}?=" if should_encode
         # Add the line to the output and reset the prepend
         folded_lines << line

diff --git a/spec/mail/encodings_spec.rb b/spec/mail/encodings_spec.rb
@@ -283,18 +283,20 @@
       expect(Mail::Encodings.value_decode(string)).to eq result
     end
 
-    it "should not fold a long string that has no spaces" do
+    it "should fold a long non-ascii string that has no spaces" do
       original = "ВосстановлениеВосстановлениеВашегопароля"
       if original.respond_to?(:force_encoding)
         original = original.dup.force_encoding('UTF-8')
-        result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
-      else
-        result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
       end
+      result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
       mail = Mail.new
       mail.subject = original
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result
     end
 
     it "should round trip a complex string properly" do
@@ -306,13 +308,27 @@
       mail = Mail.new
       mail.subject = original
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
+
       mail = Mail.new(mail.encoded)
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
+
       mail = Mail.new(mail.encoded)
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
     end
 
     it "should round trip another complex string (koi-8)" do

diff --git a/spec/mail/fields/unstructured_field_spec.rb b/spec/mail/fields/unstructured_field_spec.rb
@@ -149,25 +149,66 @@
       @field = Mail::UnstructuredField.new("Subject", string)
       string = string.dup.force_encoding('UTF-8')
       result = "Subject: =?UTF-8?Q?This_is_=E3=81=82_really_long_string_This_is_=E3=81=82?=\r\n\s=?UTF-8?Q?_really_long_string_This_is_=E3=81=82_really_long_string_This_is?=\r\n\s=?UTF-8?Q?_=E3=81=82_really_long_string_This_is_=E3=81=82_really_long?=\r\n\s=?UTF-8?Q?_string?=\r\n"
-      expect(@field.encoded).to eq result
       expect(@field.decoded).to eq string
+      expect(@field.encoded).to eq result
     end
 
     it "should fold properly with my actual complicated header" do
       string = %|{"unique_args": {"mailing_id":147,"account_id":2}, "to": ["[email protected]"], "category": "mailing", "filters": {"domainkeys": {"settings": {"domain":1,"enable":1}}}, "sub": {"{{open_image_url}}": ["http://betaling.larspind.local/O/token/147/Mailing::FakeRecipient"], "{{name}}": ["[FIRST NAME]"], "{{signup_reminder}}": ["(her kommer til at stå hvornår folk har skrevet sig op ...)"], "{{unsubscribe_url}}": ["http://betaling.larspind.local/U/token/147/Mailing::FakeRecipient"], "{{email}}": ["[email protected]"], "{{link:308}}": ["http://betaling.larspind.local/L/308/0/Mailing::FakeRecipient"], "{{confirm_url}}": [""], "{{ref}}": ["[REF]"]}}|
       @field = Mail::UnstructuredField.new("X-SMTPAPI", string)
       string = string.dup.force_encoding('UTF-8')
       result = "X-SMTPAPI: =?UTF-8?Q?{=22unique=5Fargs=22:_{=22mailing=5Fid=22:147,=22a?=\r\n =?UTF-8?Q?ccount=5Fid=22:2},_=22to=22:_[[email protected]=22],_=22categ?=\r\n =?UTF-8?Q?ory=22:_=22mailing=22,_=22filters=22:_{=22domainkeys=22:_{=22sett?=\r\n =?UTF-8?Q?ings=22:_{=22domain=22:1,=22enable=22:1}}},_=22sub=22:_{=22{{op?=\r\n =?UTF-8?Q?en=5Fimage=5Furl}}=22:_[=22http://betaling.larspind.local/O?=\r\n =?UTF-8?Q?/token/147/Mailing::FakeRecipient=22],_=22{{name}}=22:_[=22[FIRST?=\r\n =?UTF-8?Q?_NAME]=22],_=22{{signup=5Freminder}}=22:_[=22=28her_kommer_til_at?=\r\n =?UTF-8?Q?_st=C3=A5_hvorn=C3=A5r_folk_har_skrevet_sig_op_...=29=22],?=\r\n =?UTF-8?Q?_=22{{unsubscribe=5Furl}}=22:_[=22http://betaling.larspind.?=\r\n =?UTF-8?Q?local/U/token/147/Mailing::FakeRecipient=22],_=22{{email}}=22:?=\r\n =?UTF-8?Q?_[[email protected]=22],_=22{{link:308}}=22:_[=22http://beta?=\r\n =?UTF-8?Q?ling.larspind.local/L/308/0/Mailing::FakeRecipient=22],_=22{{con?=\r\n =?UTF-8?Q?firm=5Furl}}=22:_[=22=22],_=22{{ref}}=22:_[=22[REF]=22]}}?=\r\n"
-      expect(@field.encoded).to eq result
       expect(@field.decoded).to eq string
+      encoded = @field.encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
     end
 
     it "should fold properly with continuous spaces around the linebreak" do
-      @field = Mail::UnstructuredField.new("Subject", "This is a header that has continuous spaces around line break point,     which should be folded properly")
+      subject = "This is a header that has continuous spaces around line break point,     which should be folded properly"
+      @field = Mail::UnstructuredField.new("Subject", subject)
       result = "Subject: This is a header that has continuous spaces around line break point,\s\r\n\s\s\s\swhich should be folded properly\r\n"
+      expect(@field.decoded).to eq subject
       expect(@field.encoded).to eq result
     end
 
+    it "should fold an ASCII-only subject with more than 78 characters and no white space" do
+      value = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
+      expect(value.length).to be > 78 - "Subject: ".length
+      @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
+    end
+
+    it "should fold an ASCII-only subject with more than 998 characters and no white space" do
+      value = "ThisIsASubjectHeaderMessageThatIsGoingToBeMoreThan998CharactersLong." * 20
+      @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
+    end
+
+    # TODO: tweak unstructured_field to always generate lines of at most 78 chars
+    it "should fold a Japanese subject with more than 998 characters long and no white space" do
+      value = "これは非常に長い日本語のSubjectです。空白はありません。" * 1
+      @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
+    end
+
+    # TODO: tweak unstructured_field to always generate lines of at most 78 chars
+    it "should fold full of emoji subject that is going to be more than 998 bytes unfolded" do
+      value = "😄" * 90
+      @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
+    end
+
   end
 
   describe "encoding non QP safe chars" do
@@ -179,9 +220,13 @@
   end
 
   describe "iso-2022-jp Subject" do
-    it "should encoded with ISO-2022-JP encoding" do
-      @field = Mail::UnstructuredField.new("Subject", "あいうえお")
+    it "should be encoded with ISO-2022-JP encoding" do
+      value = "あいうえお"
+      @field = Mail::UnstructuredField.new("Subject", value)
       @field.charset = 'iso-2022-jp'
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
       expect(@field.encoded).to eq "Subject: =?ISO-2022-JP?Q?=1B$B$=22$$$&$=28$*=1B=28B?=\r\n"
     end
   end