From 8ba80de3a3f75c4be2a2bae24ea4921a37155868 Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Fri, 2 Aug 2019 21:42:26 +0000 Subject: [PATCH] enable force-decoding with `decode_gzip => force` --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 12 ++++++++++++ lib/logstash/inputs/s3.rb | 11 ++++++++++- logstash-input-s3.gemspec | 2 +- spec/inputs/s3_spec.rb | 7 +++++++ 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de57c3e..e0424cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 3.5.0 + - Added ability to force gzip-decoding regardless of filename with `decode_gzip => force` + ## 3.4.1 - Fixed link formatting for input type (documentation) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 189a8e9..db318ab 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -140,6 +140,18 @@ The name of the S3 bucket. Whether to delete processed files from the original bucket. +[id="plugins-{type}s-{plugin}-decode_gzip"] +==== `decode_gzip` + + * Value type is <> + * Acceptable values are `detect` and `force` + * Default value is `detect` + +Controls behaviour for performing gzip-decoding on files. + + * `detect` will perform gzip-decoding on files with matching extensions (`.gz` and `.gzip`) + * `force` will perform gzip-decoding on all files, regardless of extension or contents. + [id="plugins-{type}s-{plugin}-endpoint"] ===== `endpoint` diff --git a/lib/logstash/inputs/s3.rb b/lib/logstash/inputs/s3.rb index 23d458c..f085cf7 100644 --- a/lib/logstash/inputs/s3.rb +++ b/lib/logstash/inputs/s3.rb @@ -79,6 +79,9 @@ class LogStash::Inputs::S3 < LogStash::Inputs::Base # be present. config :include_object_properties, :validate => :boolean, :default => false + # Controls which files are handled as gzip. + config :decode_gzip, :validate => %w(detect force), :default => 'detect' + public def register require "fileutils" @@ -109,6 +112,12 @@ def register if !@watch_for_new_files && original_params.include?('interval') logger.warn("`watch_for_new_files` has been disabled; `interval` directive will be ignored.") end + + @gzip_detector = case @decode_gzip + when "force" then -> (_) { true } + when "detect" then -> (filename) { filename.end_with?('.gz','.gzip') } + else fail(LogStash::ConfigurationError, "unsupported value `#{@decode_gzip}` for decode_gzip") + end end public @@ -315,7 +324,7 @@ def read_gzip_file(filename, block) private def gzip?(filename) - filename.end_with?('.gz','.gzip') + @gzip_detector.call(filename) end private diff --git a/logstash-input-s3.gemspec b/logstash-input-s3.gemspec index ff3e382..efcf4e7 100644 --- a/logstash-input-s3.gemspec +++ b/logstash-input-s3.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-s3' - s.version = '3.4.1' + s.version = '3.5.0' s.licenses = ['Apache-2.0'] s.summary = "Streams events from files in a S3 bucket" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/s3_spec.rb b/spec/inputs/s3_spec.rb index cd97868..71f7b35 100644 --- a/spec/inputs/s3_spec.rb +++ b/spec/inputs/s3_spec.rb @@ -428,6 +428,13 @@ include_examples "generated events" end + context 'force gzip decoding' do + let(:config) { super().merge('decode_gzip' => 'force') } + let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gz') } + + include_examples "generated events" + end + context 'compressed with gzip extension' do let(:log) { double(:key => 'log.gz', :last_modified => Time.now - 2 * day, :content_length => 5, :storage_class => 'STANDARD') } let(:log_file) { File.join(File.dirname(__FILE__), '..', 'fixtures', 'compressed.log.gzip') }