diff --git a/CHANGELOG.md b/CHANGELOG.md index b55b1e1d68..a13add7cc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [4.8.1] - TBD +### Added + +- Add functionality to unify data of the binary processes with their subprocesses to plot ([#5500](https://github.com/wazuh/wazuh-qa/pull/5500)) \- (Framework) + ### Changed - Fix test_consistency_initial_scans by adding a 30-minute wait before collecting vulnerabilities. ([#5507](https://github.com/wazuh/wazuh-qa/pull/5507)) \- (Tests) diff --git a/deps/wazuh_testing/wazuh_testing/scripts/data_visualizations.py b/deps/wazuh_testing/wazuh_testing/scripts/data_visualizations.py index f7c665dc4f..8ba897af7b 100644 --- a/deps/wazuh_testing/wazuh_testing/scripts/data_visualizations.py +++ b/deps/wazuh_testing/wazuh_testing/scripts/data_visualizations.py @@ -20,6 +20,8 @@ def get_script_arguments(): help=f'Base name for the images. Default {None}.') parser.add_argument('-c', '--columns', dest='columns', default=None, help=f'Path to Json with Columns to Plot. Default {None}.') + parser.add_argument('-u', '--unify', dest='unify', action='store_true', + help=f'Unify data of the binary processes with their subprocesses to plot.') return parser.parse_args() @@ -32,7 +34,7 @@ def main(): makedirs(destination) dv = DataVisualizer(dataframes=options.csv_list, target=options.visualization_target, compare=False, store_path=options.destination, base_name=options.name, - columns_path=options.columns) + columns_path=options.columns, unify_child_daemon_metrics=options.unify) dv.plot() diff --git a/deps/wazuh_testing/wazuh_testing/tools/performance/visualization.py b/deps/wazuh_testing/wazuh_testing/tools/performance/visualization.py index 807d3ed5b5..31ead4be57 100644 --- a/deps/wazuh_testing/wazuh_testing/tools/performance/visualization.py +++ b/deps/wazuh_testing/wazuh_testing/tools/performance/visualization.py @@ -4,6 +4,7 @@ from matplotlib.ticker import LinearLocator import json +import logging import matplotlib.dates as mdates import matplotlib.pyplot as plt import pandas as pd @@ -32,7 +33,7 @@ class DataVisualizer: base_name (str, optional): base name used to store the images. """ def __init__(self, dataframes, target, compare=False, store_path=gettempdir(), x_ticks_granularity='minutes', - x_ticks_interval=1, base_name=None, columns_path=None): + x_ticks_interval=1, base_name=None, columns_path=None, unify_child_daemon_metrics=False): self.dataframes_paths = dataframes self.dataframe = None self.compare = compare @@ -48,6 +49,13 @@ def __init__(self, dataframes, target, compare=False, store_path=gettempdir(), x if target in ['binary', 'analysis', 'remote', 'agent', 'logcollector', 'wazuhdb']: self.columns_to_plot = self._load_columns_to_plot(columns_path) + if unify_child_daemon_metrics: + if target == 'binary': + self.dataframe = self.dataframe.reset_index(drop=False) + self._unify_dataframes() + else: + logging.warning("Enabled unify is only available for binary data. Ignoring") + @staticmethod def _color_palette(size): """Create a list of different colors. @@ -87,6 +95,25 @@ def _load_dataframes(self): new_csv = pd.read_csv(df_path, index_col="Timestamp", parse_dates=True) self.dataframe = pd.concat([self.dataframe, new_csv]) + def _unify_dataframes(self): + """Unify the data of each process with their respective sub-processes. + """ + pids = self.dataframe[['Daemon', 'PID']].drop_duplicates() + versions = self.dataframe[['Daemon', 'Version']].drop_duplicates() + + daemons_list = [daemon_name for daemon_name in self._get_daemons() if "child" not in daemon_name] + + for daemon_name in daemons_list: + self.dataframe.loc[self.dataframe['Daemon'].str.contains(daemon_name, na=False), 'Daemon'] = daemon_name + + columns_to_drop = ['Timestamp', 'Daemon', 'Version', 'PID'] + columns_to_sum = self.dataframe.columns.drop(columns_to_drop) + + self.dataframe = self.dataframe.groupby(['Timestamp', 'Daemon'])[columns_to_sum].sum().reset_index(drop=False) + + self.dataframe = self.dataframe.merge(pids[['Daemon', 'PID']], on='Daemon', how='left') + self.dataframe = self.dataframe.merge(versions[['Daemon', 'Version']], on='Daemon', how='left') + def _set_x_ticks_interval(self, ax): """Set the number of labels that will appear in the X axis and their format.