Skip to content

Commit

Permalink
Apply a time limit to office-doc-to-pdf conversions
Browse files Browse the repository at this point in the history
  • Loading branch information
elohanlon committed Jul 18, 2024
1 parent 31df310 commit 025085c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
13 changes: 11 additions & 2 deletions app/models/concerns/derivativo/pdf_derivatives.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
module Derivativo::PdfDerivatives
extend ActiveSupport::Concern

# If an office conversion operation takes longer than this time, there may be something wrong.
# This timeout stops us from getting stuck on an bad office doc conversion that could run for hours.
SMALL_OFFICE_CONVERSION_DOC_TIMEOUT = 30.seconds
LARGE_OFFICE_CONVERSION_DOC_TIMEOUT = 120.seconds
LARGE_OFFICE_CONVERSION_DOC_THRESHOLD = 5.megabytes

def derivative_proc_for_output_path(out_path)
Proc.new do |in_path|
if in_path =~ /pdf$/i
Expand Down Expand Up @@ -87,7 +93,10 @@ def office_convert(in_path, out_path, soffice_path, office_export_format)
Shellwords.escape(tmp_outdir),
Shellwords.escape(in_path)
].join(' ')
system(conversion_command)
Derivativo::ShellCommand.run_with_timeout(
conversion_command,
File.size(in_path) < LARGE_OFFICE_CONVERSION_DOC_THRESHOLD ? SMALL_OFFICE_CONVERSION_DOC_TIMEOUT : LARGE_OFFICE_CONVERSION_DOC_TIMEOUT
)

# The office conversion process always keeps the original name of the file and replaces
# the extension with 'pdf', so we'll need to predict the tmp outpath and move it after.
Expand All @@ -110,7 +119,7 @@ def office_convert(in_path, out_path, soffice_path, office_export_format)

# Do the conversion, updating the in_path
in_path = recode_in_path
system(conversion_command)
Derivativo::ShellCommand.run_with_timeout(conversion_command, OFFICE_CONVERSION_TIMEOUT)
# After successful conversion, delete the recode_in_path file, since it was temporary
File.delete(recode_in_path)
end
Expand Down
34 changes: 34 additions & 0 deletions app/models/concerns/derivativo/shell_command.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Derivativo::ShellCommand
def self.run_with_timeout(cmd, timeout_in_seconds)
stdout_str = ''
stderr_str = ''
begin
read_out, write_out = IO.pipe
read_err, write_err = IO.pipe
pid = Process.spawn(cmd, pgroup: true, :out => write_out, :err => write_err)
Timeout.timeout(timeout_in_seconds) do
Process.waitpid(pid)

# close write ends so we can read from them
write_out.close
write_err.close

stdout_str = read_out.readlines.join
stderr_str = read_err.readlines.join
end
rescue Timeout::Error => e
Process.kill('KILL', pid)
Process.detach(pid)
# re-raise timeout error
raise e
ensure
write_out.close unless write_out.closed?
write_err.close unless write_err.closed?
# dispose the read ends of the pipes
read_out.close
read_err.close
end

[stdout_str, stderr_str]
end
end

0 comments on commit 025085c

Please sign in to comment.