Skip to content

Commit

Permalink
Merge pull request #282 from puppetlabs/bug-retry_on_invokedsc_collision
Browse files Browse the repository at this point in the history
(feat) - add retries on failed dsc invocation
  • Loading branch information
david22swan authored Jan 31, 2024
2 parents 36ed427 + 3e97cbf commit a74bd25
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 8 deletions.
14 changes: 7 additions & 7 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2024-01-23 11:29:18 UTC using RuboCop version 1.50.2.
# on 2024-01-30 16:36:55 UTC using RuboCop version 1.50.2.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
Expand Down Expand Up @@ -46,14 +46,14 @@ Metrics/BlockLength:
# Offense count: 2
# Configuration parameters: CountComments, CountAsOne.
Metrics/ClassLength:
Max: 526
Max: 553

# Offense count: 12
# Configuration parameters: AllowedMethods, AllowedPatterns.
Metrics/CyclomaticComplexity:
Max: 24

# Offense count: 22
# Offense count: 23
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
Metrics/MethodLength:
Max: 42
Expand Down Expand Up @@ -118,12 +118,12 @@ RSpec/DescribeClass:
- 'spec/acceptance/dsc/complex.rb'
- 'spec/unit/pwsh_spec.rb'

# Offense count: 31
# Offense count: 32
# Configuration parameters: CountAsOne.
RSpec/ExampleLength:
Max: 70

# Offense count: 102
# Offense count: 105
# Configuration parameters: .
# SupportedStyles: have_received, receive
RSpec/MessageSpies:
Expand All @@ -134,7 +134,7 @@ RSpec/MultipleDescribes:
Exclude:
- 'spec/unit/pwsh_spec.rb'

# Offense count: 147
# Offense count: 151
RSpec/MultipleExpectations:
Max: 15

Expand All @@ -156,7 +156,7 @@ RSpec/NoExpectationExample:
- 'spec/unit/pwsh/windows_powershell_spec.rb'
- 'spec/unit/pwsh_spec.rb'

# Offense count: 55
# Offense count: 56
RSpec/StubbedMock:
Exclude:
- 'spec/unit/puppet/provider/dsc_base_provider/dsc_base_provider_spec.rb'
Expand Down
47 changes: 46 additions & 1 deletion lib/puppet/provider/dsc_base_provider/dsc_base_provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
require 'pathname'
require 'json'

class Puppet::Provider::DscBaseProvider
class Puppet::Provider::DscBaseProvider # rubocop:disable Metrics/ClassLength
# Initializes the provider, preparing the instance variables which cache:
# - the canonicalized resources across calls
# - query results
Expand Down Expand Up @@ -244,6 +244,7 @@ def invoke_dsc_resource(context, name_hash, props, method)
script_content = ps_script_content(resource)
context.debug("Script:\n #{redact_secrets(script_content)}")
output = ps_manager.execute(remove_secret_identifiers(script_content))[:stdout]

if output.nil?
context.err('Nothing returned')
return nil
Expand All @@ -256,8 +257,10 @@ def invoke_dsc_resource(context, name_hash, props, method)
return nil
end
context.debug("raw data received: #{data.inspect}")
collision_error_matcher = /The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked/

error = data['errormessage']

unless error.nil? || error.empty?
# NB: We should have a way to stop processing this resource *now* without blowing up the whole Puppet run
# Raising an error stops processing but blows things up while context.err alerts but continues to process
Expand All @@ -267,6 +270,11 @@ def invoke_dsc_resource(context, name_hash, props, method)
@logon_failures << name_hash[:dsc_psdscrunascredential].dup
# This is a hack to handle the query cache to prevent a second lookup
@cached_query_results << name_hash # if fetch_cached_hashes(@cached_query_results, [data]).empty?
elsif error.match?(collision_error_matcher)
context.notice('Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour.')
retry_invoke_dsc_resource(context, 5, 60, collision_error_matcher) do
data = ps_manager.execute(remove_secret_identifiers(script_content))[:stdout]
end
else
context.err(error)
end
Expand All @@ -276,6 +284,43 @@ def invoke_dsc_resource(context, name_hash, props, method)
data
end

# Retries Invoke-DscResource when returned error matches error regex supplied as param.
# @param context [Object] the Puppet runtime context to operate in and send feedback to
# @param max_retry_count [Int] max number of times to retry Invoke-DscResource
# @param retry_wait_interval_secs [Int] Time delay between retries
# @param error_matcher [String] the regex pattern to match with error
def retry_invoke_dsc_resource(context, max_retry_count, retry_wait_interval_secs, error_matcher)
try = 0
while try < max_retry_count
try += 1
# notify and wait for retry interval
context.notice("Sleeping for #{retry_wait_interval_secs} seconds.")
sleep retry_wait_interval_secs
# notify and retry
context.notice("Retrying: attempt #{try} of #{max_retry_count}.")
data = JSON.parse(yield)
# if no error, break
if data['errormessage'].nil?
break
# check if error matches error matcher supplied
elsif data['errormessage'].match?(error_matcher)
# if last attempt, return error
if try == max_retry_count
context.notice("Attempt #{try} of #{max_retry_count} failed. No more retries.")
# all attempts failed, raise error
return context.err(data['errormessage'])
end
# if not last attempt, notify, continue and retry
context.notice("Attempt #{try} of #{max_retry_count} failed.")
next
else
# if we get an unexpected error, return
return context.err(data['errormessage'])
end
end
data
end

# Determine if the DSC Resource is in the desired state, invoking the `Test` method unless it's
# already been run for the resource, in which case reuse the result instead of checking for each
# property. This behavior is only triggered if the validation_mode is set to resource; by default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,44 @@
end
end

context 'when the invocation script errors with a collision' do
it 'writes a notice via context and applies successfully on retry' do
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' })
expect(context).to receive(:notice).with(/Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour./).once
expect(context).to receive(:notice).with('Sleeping for 60 seconds.').twice
expect(context).to receive(:notice).with(/Retrying: attempt [1-2] of 5/).twice
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' })
expect(context).to receive(:notice).with('Attempt 1 of 5 failed.')
allow(provider).to receive(:sleep)
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": null}' })
expect { result }.not_to raise_error
end

it 'writes a error via context and fails to apply when all retry attempts used' do
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' })
.exactly(5).times
expect(context).to receive(:notice).with(/Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour./).once
expect(context).to receive(:notice).with('Sleeping for 60 seconds.').exactly(5).times
expect(context).to receive(:notice).with(/Retrying: attempt [1-6] of 5/).exactly(5).times
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' })
expect(context).to receive(:notice).with(/Attempt [1-6] of 5 failed/).exactly(5).times
expect(context).to receive(:err).with(/The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked/)
allow(provider).to receive(:sleep)
expect(result).to be_nil
end

it 'writes an error via context and fails to apply when encountering an unexpected error' do
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "The Invoke-DscResource cmdlet is in progress and must return before Invoke-DscResource can be invoked"}' })
expect(context).to receive(:notice).with(/Invoke-DscResource collision detected: Please stagger the timing of your Puppet runs as this can lead to unexpected behaviour./).once
expect(context).to receive(:notice).with('Sleeping for 60 seconds.').once
expect(context).to receive(:notice).with(/Retrying: attempt 1 of 5/).once
allow(provider).to receive(:sleep)
expect(ps_manager).to receive(:execute).and_return({ stdout: '{"errormessage": "Some unexpected error"}' }).once
expect(context).to receive(:err).with(/Some unexpected error/)
expect(result).to be_nil
end
end

context 'when the invocation script returns data without errors' do
it 'filters for the correct properties to invoke and returns the results' do
expect(ps_manager).to receive(:execute).with("Script: #{apply_props}").and_return({ stdout: '{"in_desired_state": true, "errormessage": null}' })
Expand Down

0 comments on commit a74bd25

Please sign in to comment.