Skip to content

Commit

Permalink
process-stats: Add support for graphs with aggregate resource usage.
Browse files Browse the repository at this point in the history
We take all the stats and generate sum/mean/max/min graphs per process
type as well as an extra graph for all OVN precesses together and
another one for all OVS precesses together.

Resulted HTML also contains the peak of the sum of RSS and the peak of
the sum of CPU usage for all OVN precesses together.

Signed-off-by: Ilya Maximets <[email protected]>
  • Loading branch information
igsilya committed Mar 27, 2024
1 parent 6c979d0 commit cce2209
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 9 deletions.
6 changes: 6 additions & 0 deletions do.sh
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,12 @@ function mine_data() {
| grep ovn-scale | head -3)
python3 ${topdir}/utils/process-stats.py \
resource-usage-report-worker.html ${resource_usage_logs}

# Preparing reports for aggregate resource usage.
resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json)
python3 ${topdir}/utils/process-stats.py --aggregate \
resource-usage-report-aggregated.html ${resource_usage_logs}

deactivate

popd
Expand Down
91 changes: 82 additions & 9 deletions utils/process-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,41 @@ def read_file(filename: str) -> Dict:
return json.load(file)


def resource_stats_generate(filename: str, data: Dict) -> None:
def aggregated(df: pd.DataFrame) -> (pd.DataFrame, int):
column_names = list(df.columns)

df = df.pivot(
index='Time', columns='Process', values=column_names[2]
).interpolate(method='time', limit_direction='both')

result = pd.DataFrame(index=df.index)
processes = set([p.split('|')[0] for p in list(df.columns)])

for p in processes:
df_filtered = df.filter(regex='^' + p)
result[p + '|sum'] = df_filtered.sum(axis=1)
result[p + '|mean'] = df_filtered.mean(axis=1)
result[p + '|max'] = df_filtered.max(axis=1)
result[p + '|min'] = df_filtered.min(axis=1)

result['ovn|sum'] = df.filter(regex='^ovn.*\\|ovn-(central|scale).*').sum(
axis=1
)
ovn_max = result['ovn|sum'].astype('int').max()

result['ovs|sum'] = df.filter(regex='^ovs.*\\|ovn-(central|scale).*').sum(
axis=1
)

result = result.astype('int').reset_index().melt(id_vars=['Time'])
result.columns = column_names

return result, ovn_max


def resource_stats_generate(
filename: str, data: Dict, aggregate: bool
) -> None:
rss: List[List] = []
cpu: List[List] = []

Expand All @@ -27,19 +61,48 @@ def resource_stats_generate(filename: str, data: Dict) -> None:
df_rss = pd.DataFrame(rss, columns=['Time', 'Process', 'RSS (MB)'])
df_cpu = pd.DataFrame(cpu, columns=['Time', 'Process', 'CPU (%)'])

if aggregate:
df_rss, max_sum_rss = aggregated(df_rss)
df_cpu, max_sum_cpu = aggregated(df_cpu)

rss_chart = px.line(
df_rss,
x='Time',
y='RSS (MB)',
color='Process',
title='Resident Set Size',
title=('Aggregate ' if aggregate else '') + 'Resident Set Size',
)
cpu_chart = px.line(
df_cpu, x='Time', y='CPU (%)', color='Process', title='CPU usage'
df_cpu,
x='Time',
y='CPU (%)',
color='Process',
title=('Aggregate ' if aggregate else '') + 'CPU usage',
)

with open(filename, 'w') as report_file:
report_file.write('<html>')
if aggregate:
report_file.write(
'''
<table border="1" class="dataframe">
<tbody>
<tr>
<td>Max(Sum(OVN RSS))</td>
<td>'''
+ str(max_sum_rss)
+ ''' MB</td>
</tr>
<tr>
<td>Max(Sum(OVN CPU))</td>
<td>'''
+ str(max_sum_cpu)
+ ''' %</td>
</tr>
</tbody>
</table>
'''
)
report_file.write(
rss_chart.to_html(
full_html=False,
Expand All @@ -60,17 +123,27 @@ def resource_stats_generate(filename: str, data: Dict) -> None:


if __name__ == '__main__':
if len(sys.argv) < 3:
print(f'Usage: {sys.argv[0]} output-file input-file [input-file ...]')
if len(sys.argv) < 3 or (
sys.argv[1] == '--aggregate' and len(sys.argv) < 4
):
print(
f'Usage: {sys.argv[0]} '
+ '[--aggregate] output-file input-file [input-file ...]'
)
sys.exit(1)

if os.path.isfile(sys.argv[1]):
print(f'Output file {sys.argv[1]} already exists')
aggregate = sys.argv[1] == '--aggregate'
start = 2 if aggregate else 1

if os.path.isfile(sys.argv[start]):
print(f'Output file {sys.argv[start]} already exists')
sys.exit(2)

print(f'Processing stats from {len(sys.argv) - start - 1} files.')

data: Dict = {}
for f in sys.argv[2:]:
for f in sys.argv[start + 1 :]:
d = read_file(f)
data.update(d)

resource_stats_generate(sys.argv[1], data)
resource_stats_generate(sys.argv[start], data, aggregate)

0 comments on commit cce2209

Please sign in to comment.