Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Block extensions disallowed by policy #3259

Open
wants to merge 14 commits into
base: develop
Choose a base branch
from
78 changes: 76 additions & 2 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \
SupportedFeatureNames, get_supported_feature_by_name, get_agent_supported_features_list_for_crp
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.ga.policy.policy_engine import ExtensionPolicyEngine, ExtensionPolicyError
from azurelinuxagent.common.datacontract import get_properties, set_properties
from azurelinuxagent.common.errorstate import ErrorState
from azurelinuxagent.common.event import add_event, elapsed_milliseconds, WALAEventOperation, \
Expand Down Expand Up @@ -482,10 +483,50 @@ def handle_ext_handlers(self, goal_state_id):
depends_on_err_msg = None
extensions_enabled = conf.get_extensions_enabled()

# Instantiate policy engine, and use same engine to handle all extension handlers.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the check for 'if not extensions_enabled:' should be done before the checks for policy

# If an error is thrown during policy engine initialization, we block all extensions and report the error via handler/extension status for
# each extension.
policy_error = None
try:
policy_engine = ExtensionPolicyEngine()
except Exception as ex:
policy_error = ex

for extension, ext_handler in all_extensions:

handler_i = ExtHandlerInstance(ext_handler, self.protocol, extension=extension)

# Invoke policy engine to determine if extension is allowed. If not, block extension and report error on
# behalf of the extension.
policy_err_map = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like this is a constant... define it at the class level?

ExtensionRequestedState.Enabled: ('enable', ExtensionErrorCodes.PluginEnableProcessingFailed),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add a comment describing the elements in the tuple?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'enable' and 'disable' are internal CRP/Agent operations; users are not aware of them. They should not be propagated to error messages displayed to the user

# Note: currently, when uninstall is requested for an extension, CRP polls until the agent does not
# report status for that extension, or until timeout is reached. In the case of a policy error, the
# agent reports failed status on behalf of the extension, which will cause CRP to poll for the full
# timeout, instead of failing fast.
#
# TODO: CRP does not currently have a terminal error code for uninstall. Once this code is added, use
# it instead of PluginDisableProcessingFailed below.
ExtensionRequestedState.Uninstall: ('uninstall', ExtensionErrorCodes.PluginDisableProcessingFailed),
ExtensionRequestedState.Disabled: ('disable', ExtensionErrorCodes.PluginDisableProcessingFailed),
}
policy_op, policy_err_code = policy_err_map.get(ext_handler.state)
if policy_error is not None:
err = ExtensionPolicyError(msg="", inner=policy_error, code=policy_err_code)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the intention of creating an exception object here? seems like it is only used to pass the error code, but it is never raised

self.__handle_and_report_policy_error(handler_i, err, report_op=handler_i.operation, message=ustr(err),
extension=extension, report=True)
continue

extension_allowed = policy_engine.should_allow_extension(ext_handler.name)
if not extension_allowed:
msg = "failed to {0} extension '{1}' because extension is not specified in allowlist. To {0}, " \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
msg = "failed to {0} extension '{1}' because extension is not specified in allowlist. To {0}, " \
msg = "failed to {0} extension '{1}' because it is not specified in allowlist. To {0}, " \

"add extension to the allowed list in the policy file ('{2}').".format(policy_op,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"add extension to the allowed list in the policy file ('{2}').".format(policy_op,
"add the extension to the allowed list in the policy file ('{2}').".format(policy_op,

ext_handler.name,
conf.get_policy_file_path())
err = ExtensionPolicyError(msg, code=policy_err_code)
self.__handle_and_report_policy_error(handler_i, err, report_op=handler_i.operation, message=ustr(err),
extension=extension, report=True)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like we are missing a continue statement here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think continue statement would break the dependency logic.

It's ok to use continue in the other condition because we know all extensions will fail (dependencies don't matter)

# In case of extensions disabled, we skip processing extensions. But CRP is still waiting for some status
# back for the skipped extensions. In order to propagate the status back to CRP, we will report status back
# here with an error message.
Expand Down Expand Up @@ -527,7 +568,11 @@ def handle_ext_handlers(self, goal_state_id):
continue

# Process extensions and get if it was successfully executed or not
extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id)
# If extension was blocked by policy, treat the extension as failed and do not process the handler.
if not extension_allowed:
extension_success = False
else:
extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id)

dep_level = self.__get_dependency_level((extension, ext_handler))
if 0 <= dep_level < max_dep_level:
Expand Down Expand Up @@ -692,6 +737,32 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess
add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True,
message=message)

@staticmethod
def __handle_and_report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
maddieford marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def __handle_and_report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
def _report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):

# TODO: Consider merging this function with __handle_and_report_ext_handler_errors() above, after investigating
# the impact of this change.
#
# If extension status is present, CRP will ignore handler status and report extension status. In the case of policy errors,
# extensions are not processed, so collect_ext_status() reports transitioning status on behalf of the extension.
# However, extensions blocked by policy should fail fast, so agent should write a .status file for policy failures.
# Note that __handle_and_report_ext_handler_errors() does not create the file for single-config extensions, but changing
# it will require additional testing/investigation. As a temporary workaround, this separate function was created
# to write a status file for single-config extensions.

# Set handler status for all extensions (with and without settings)
ext_handler_i.set_handler_status(message=message, code=error.code)

# Create status file for extensions with settings (single and multi config).
if extension is not None:
ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create_status_file_if_not_exist() will not overwrite existing status file (for the current sequence number). Is this behavior acceptable?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should overwrite the existing file with the policy error

operation=report_op, message=message)

if report:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when would report be False?

name = ext_handler_i.get_extension_full_name(extension)
handler_version = ext_handler_i.ext_handler.version
add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True,
message=message)

def handle_enable(self, ext_handler_i, extension):
"""
1- Ensure the handler is installed
Expand Down Expand Up @@ -990,7 +1061,10 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed):
# extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc)
# We also need to report extension status for an uninstalled handler if extensions are disabled because CRP
# waits for extension runtime status before failing the extension operation.
if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled():
# In the case of policy failures, we want to report extension status with a terminal code so CRP fails fast. If
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's change this

        # We also need to report extension status for an uninstalled handler if extensions are disabled because CRP
        # waits for extension runtime status before failing the extension operation.
        # In the case of policy failures, we want to report extension status with a terminal code so CRP fails fast. If
        # extension status is not present, collect_ext_status() will set a default transitioning status, and CRP will
        # wait for timeout.

to

        # We also need to report extension status for an uninstalled handler if extensions are disabled, or if the extension
        # failed due to policy, because CRP waits for extension runtime status before failing the extension operation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intention of the change is to enter this condition when the extension fails due to policy, but this change means that we enter the condition whenever policy is enabled.

Is there any negative effect to calling ext_handler_i.get_extension_handler_statuses... whenever policy is enabled? Why is this behind the if condition in the first place?

# extension status is not present, collect_ext_status() will set a default transitioning status, and CRP will
# wait for timeout.
if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled() or ExtensionPolicyEngine.get_policy_enforcement_enabled():

# Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found.
# Only try to read heartbeat if HandlerState != NotInstalled.
Expand Down
21 changes: 12 additions & 9 deletions azurelinuxagent/ga/policy/policy_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common import conf
from azurelinuxagent.common.exception import AgentError
from azurelinuxagent.common.exception import AgentError, ExtensionError
from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion

Expand All @@ -36,12 +36,6 @@
_MAX_SUPPORTED_POLICY_VERSION = "0.1.0"


class PolicyError(AgentError):
"""
Error raised during agent policy enforcement.
"""


class InvalidPolicyError(AgentError):
"""
Error raised if user-provided policy is invalid.
Expand All @@ -51,13 +45,22 @@ def __init__(self, msg, inner=None):
super(InvalidPolicyError, self).__init__(msg, inner)


class ExtensionPolicyError(ExtensionError):
"""
Error raised during agent extension policy enforcement.
"""
def __init__(self, msg, code, inner=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 'code' and 'inner' parameters are not in the same order as in the base class, which can lead to subtle coding errors.

msg = "Extension will not be processed: {0}".format(msg)
super(ExtensionPolicyError, self).__init__(msg, inner, code)


class _PolicyEngine(object):
"""
Implements base policy engine API.
"""
def __init__(self):
# Set defaults for policy
self._policy_enforcement_enabled = self.__get_policy_enforcement_enabled()
self._policy_enforcement_enabled = self.get_policy_enforcement_enabled()
if not self.policy_enforcement_enabled:
return

Expand All @@ -76,7 +79,7 @@ def _log_policy_event(msg, is_success=True, op=WALAEventOperation.Policy, send_e
add_event(op=op, message=msg, is_success=is_success, log_event=False)

@staticmethod
def __get_policy_enforcement_enabled():
def get_policy_enforcement_enabled():
"""
Policy will be enabled if (1) policy file exists at the expected location and (2) the conf flag "Debug.EnableExtensionPolicy" is true.
"""
Expand Down
Loading
Loading