Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify Process Values #5500

Merged
merged 11 commits into from
Jun 20, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def get_script_arguments():
help=f'Base name for the images. Default {None}.')
parser.add_argument('-c', '--columns', dest='columns', default=None,
help=f'Path to Json with Columns to Plot. Default {None}.')
parser.add_argument('-u', '--unify', dest='unify', action='store_true',
help=f'Unify data of the binary processes with their subprocesses to plot.')

return parser.parse_args()

Expand All @@ -32,7 +34,7 @@ def main():
makedirs(destination)
dv = DataVisualizer(dataframes=options.csv_list, target=options.visualization_target,
compare=False, store_path=options.destination, base_name=options.name,
columns_path=options.columns)
columns_path=options.columns, unify_child_daemon_metrics=options.unify)
dv.plot()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from matplotlib.ticker import LinearLocator

import json
import logging
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
Expand Down Expand Up @@ -32,7 +33,7 @@ class DataVisualizer:
base_name (str, optional): base name used to store the images.
"""
def __init__(self, dataframes, target, compare=False, store_path=gettempdir(), x_ticks_granularity='minutes',
x_ticks_interval=1, base_name=None, columns_path=None):
x_ticks_interval=1, base_name=None, columns_path=None, unify_child_daemon_metrics=False):
self.dataframes_paths = dataframes
self.dataframe = None
self.compare = compare
Expand All @@ -48,6 +49,13 @@ def __init__(self, dataframes, target, compare=False, store_path=gettempdir(), x
if target in ['binary', 'analysis', 'remote', 'agent', 'logcollector', 'wazuhdb']:
self.columns_to_plot = self._load_columns_to_plot(columns_path)

if unify_child_daemon_metrics:
if target == 'binary':
self.dataframe = self.dataframe.reset_index(drop=False)
self._unify_dataframes()
else:
logging.warning("Enabled unify is only available for binary data. Ignoring")

@staticmethod
def _color_palette(size):
"""Create a list of different colors.
Expand Down Expand Up @@ -87,6 +95,25 @@ def _load_dataframes(self):
new_csv = pd.read_csv(df_path, index_col="Timestamp", parse_dates=True)
self.dataframe = pd.concat([self.dataframe, new_csv])

def _unify_dataframes(self):
"""Unify the data of each process with their respective sub-processes.
"""
pids = self.dataframe[['Daemon', 'PID']].drop_duplicates()
versions = self.dataframe[['Daemon', 'Version']].drop_duplicates()

daemons_list = [daemon_name for daemon_name in self._get_daemons() if "child" not in daemon_name]

for daemon_name in daemons_list:
self.dataframe.loc[self.dataframe['Daemon'].str.contains(daemon_name, na=False), 'Daemon'] = daemon_name

columns_to_drop = ['Timestamp', 'Daemon', 'Version', 'PID']
columns_to_sum = self.dataframe.columns.drop(columns_to_drop)

self.dataframe = self.dataframe.groupby(['Timestamp', 'Daemon'])[columns_to_sum].sum().reset_index(drop=False)

self.dataframe = self.dataframe.merge(pids[['Daemon', 'PID']], on='Daemon', how='left')
self.dataframe = self.dataframe.merge(versions[['Daemon', 'Version']], on='Daemon', how='left')

def _set_x_ticks_interval(self, ax):
"""Set the number of labels that will appear in the X axis and their format.

Expand Down
Loading