-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes to collect metrics #18
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,4 +157,4 @@ objects: | |
maxUnavailable: 1 | ||
type: RollingUpdate | ||
parameters: | ||
- name: image_name | ||
- name: image_name |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,7 +39,7 @@ | |
route.app = '!(starts_with!(.kubernetes.pod_namespace,"kube") && starts_with!(.kubernetes.pod_namespace,"openshift") && .kubernetes.pod_namespace == "default")' | ||
|
||
[sinks.stress] | ||
type="file" | ||
type = "file" | ||
inputs = ["ocp_sys.infra","ocp_sys.app"] | ||
encoding.codec = "ndjson" | ||
path = "/var/log/containers/stress.log" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
path = "/var/log/containers/stress.log" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,8 +48,8 @@ objects: | |
chmod 744 kubectl-top.sh ; | ||
echo "Collecting Statistics"; | ||
OUTPUT_FILE=/var/log/containers/stress.log; | ||
sleep 60; | ||
echo -e "\nOutput log file is: $OUTPUT_FILE\n"; | ||
touch $OUTPUT_FILE; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you check with all collectors that there is no need to |
||
./check-logs-sequence -rf ${output_format} -f $OUTPUT_FILE -c ${number_of_log_lines_between_reports} -l ${check_log_level} & | ||
echo "=== setup-end ==="; | ||
while true; do | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,7 +30,7 @@ select_stress_profile() { | |
heavy_containers_msg_per_sec=1000 | ||
low_containers_msg_per_sec=10 | ||
number_of_log_lines_between_reports=10; | ||
maximum_logfile_size=10485760; | ||
maximum_logfile_size=52428800; # default file size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ajaygupta978 it looks like what you are trying to do is "not set" the log file rotation for your tests ... lete me suggest a better approach. I suggest revoking this change to maximum_logfile_size and instead adding a flag somewhere here https://github.com/ViaQ/cluster-logging-collector-benchmarks/blob/main/deploy_to_openshift.sh#L7 that allows controlling whether There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eranra Yes you are right. Thanks for suggestion now I think I have better suggestion. Instead of flag we can add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ajaygupta978 agree this can force the value ... I am just suggesting that for example if someone put a zero in that value we will not even call the function to set rate limits and use what ever comes out of the box from OCP. |
||
|
||
case $stress_profile in | ||
"no-stress") | ||
|
@@ -39,71 +39,55 @@ select_stress_profile() { | |
number_low_stress_containers=0; | ||
low_containers_msg_per_sec=0; | ||
number_of_log_lines_between_reports=10; | ||
maximum_logfile_size=10485760; | ||
maximum_logfile_size=52428800; | ||
;; | ||
"very-light") | ||
number_heavy_stress_containers=0; | ||
heavy_containers_msg_per_sec=0; | ||
number_low_stress_containers=1; | ||
low_containers_msg_per_sec=10; | ||
number_of_log_lines_between_reports=100; | ||
maximum_logfile_size=10485760; | ||
maximum_logfile_size=52428800; | ||
;; | ||
"light") | ||
number_heavy_stress_containers=1; | ||
heavy_containers_msg_per_sec=100; | ||
number_low_stress_containers=2; | ||
low_containers_msg_per_sec=10; | ||
number_of_log_lines_between_reports=1000; | ||
maximum_logfile_size=1048576; | ||
;; | ||
"experiment") | ||
number_heavy_stress_containers=0; | ||
heavy_containers_msg_per_sec=0; | ||
number_low_stress_containers=20; | ||
low_containers_msg_per_sec=20000; | ||
number_of_log_lines_between_reports=100; | ||
maximum_logfile_size=10485760; | ||
maximum_logfile_size=52428800; | ||
;; | ||
"medium") | ||
number_heavy_stress_containers=2; | ||
heavy_containers_msg_per_sec=1000; | ||
number_low_stress_containers=10; | ||
low_containers_msg_per_sec=10; | ||
number_of_log_lines_between_reports=20000; | ||
maximum_logfile_size=1048576; | ||
maximum_logfile_size=52428800; | ||
;; | ||
"heavy") | ||
number_heavy_stress_containers=0; | ||
heavy_containers_msg_per_sec=0; | ||
number_low_stress_containers=10; | ||
low_containers_msg_per_sec=1500; | ||
number_of_log_lines_between_reports=200000; | ||
maximum_logfile_size=1048576; | ||
;; | ||
"very-heavy") | ||
number_heavy_stress_containers=0; | ||
heavy_containers_msg_per_sec=0; | ||
number_low_stress_containers=10; | ||
low_containers_msg_per_sec=3000; | ||
number_of_log_lines_between_reports=300000; | ||
maximum_logfile_size=1048576; | ||
maximum_logfile_size=52428800; | ||
;; | ||
"heavy-loss") | ||
number_heavy_stress_containers=2; | ||
heavy_containers_msg_per_sec=20000; | ||
number_low_stress_containers=8; | ||
low_containers_msg_per_sec=1500; | ||
number_of_log_lines_between_reports=200000; | ||
maximum_logfile_size=1048576; | ||
maximum_logfile_size=52428800; | ||
;; | ||
"very-heavy") | ||
number_heavy_stress_containers=10; | ||
heavy_containers_msg_per_sec=20000; | ||
number_low_stress_containers=10; | ||
low_containers_msg_per_sec=1500; | ||
number_of_log_lines_between_reports=1000000; | ||
maximum_logfile_size=1048576; | ||
maximum_logfile_size=52428800; | ||
;; | ||
*) show_usage | ||
;; | ||
|
@@ -156,8 +140,8 @@ deploy() { | |
delete_logstress_project_if_exists | ||
create_logstress_project | ||
set_credentials | ||
deploy_logstress $number_heavy_stress_containers $heavy_containers_msg_per_sec $number_low_stress_containers $low_containers_msg_per_sec $use_log_samples | ||
if $gowatcher ; then deploy_gologfilewatcher "$gologfilewatcher_image"; fi | ||
|
||
|
||
case "$collector" in | ||
'vector') deploy_log_collector_vector "$vector_image" "$vector_conf";; | ||
'fluentd') deploy_log_collector_fluentd "$fluentd_image" "$fluentd_conf_file" "$fluentd_pre";; | ||
|
@@ -168,6 +152,8 @@ deploy() { | |
deploy_log_collector_fluentbit "$fluentbit_image" conf/collector/fluentbit/dual/fluentbit.conf;; | ||
*) show_usage ;; | ||
esac | ||
deploy_logstress $number_heavy_stress_containers $heavy_containers_msg_per_sec $number_low_stress_containers $low_containers_msg_per_sec $use_log_samples | ||
if $gowatcher ; then deploy_gologfilewatcher "$gologfilewatcher_image"; fi | ||
if $gowatcher ; then expose_metrics_to_prometheus; fi | ||
deploy_capture_statistics $number_of_log_lines_between_reports "$output_format" "$report_interval" | ||
if $evacuate_node ; then evacuate_node_for_performance_tests; fi | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,40 @@ | ||
# This script collects metrics like log-per-sec, cpu-percentage, cpu-cores and memory required per iterations in running reg-ex rules | ||
# in discovering log-levels. | ||
|
||
# collector values could be fluentd, fluentbit | ||
export collector=$1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks good to me. Maybe @jcantrill will want to take a quick second look at the changes to this file |
||
export capture_pod=`oc get pods | grep capture | cut -d" " -f1` | ||
export fluentd_pod=`oc get pods | grep fluentd | cut -d" " -f1` | ||
export iterations=`oc logs $capture_pod | grep " | ruby" | wc -l` | ||
|
||
if [ "$iterations" -lt 10 ]; then echo "Total Iterations till now: $iterations Results will be printed after 10 iterations"; exit 1; fi | ||
|
||
echo "Total No of iterations: $iterations" | ||
export collector_pod=`oc get pods | grep $collector | cut -d" " -f1` | ||
export iterations=`oc logs $capture_pod | grep "Top information on:" | wc -l` | ||
|
||
export LPS=`oc logs $capture_pod | grep -i "Total collected logs per sec:" | cut -d ":" -f2 | awk '{ SUM += $1} END { print SUM/NR }'` | ||
echo "Avg logs per sec/iter: $LPS" | ||
echo "$collector $collector_pod" | ||
# if [ "$iterations" -lt 10 ]; then echo "Total Iterations till now: $iterations Results will be printed after 10 iterations"; exit 1; fi | ||
echo "Total Iterations till now: $iterations" | ||
|
||
export Cpu_Percentage=`oc logs $capture_pod | grep -i "| ruby" | cut -d "|" -f1 | awk '{ SUM += $1} END { print SUM/NR }'` | ||
echo "Avg cpu percentage/iter: $Cpu_Percentage" | ||
while : ; do | ||
iterations=`oc logs $capture_pod | grep "Top information on:" | wc -l` | ||
export total_time=`oc logs $capture_pod | grep "Time from start monitoring (in secs)" | cut -d ":" -f2 | tr -d ' ' | tail -1` | ||
echo "Total Iterations till now: $iterations" | ||
echo "Total time till now: $total_time" | ||
export current_LPS=`oc logs $capture_pod | grep -i "Total collected logs per sec:" | cut -d ":" -f2 | tr -d ' ' | tail -1` | ||
echo "Current LPS=$current_LPS" | ||
export LPS=`oc logs $capture_pod | grep -i "Total collected logs per sec:" | cut -d ":" -f2 | awk '{ SUM += $1} END { print SUM/NR }'` | ||
echo "Avg logs per sec/iter: $LPS" | ||
|
||
export Cpu_Core=`oc logs $capture_pod | grep $fluentd_pod | awk '{print $2}' | cut -d 'm' -f1 | awk '{ SUM+= $1} END { print SUM/NR }'` | ||
echo "Avg cpu core/iter: $Cpu_Core" | ||
export current_cpu_core=`oc logs $capture_pod | grep $collector_pod | awk '{print $2}' | cut -d 'm' -f1 | tail -1` | ||
echo "Current CPU core=$current_cpu_core" | ||
export Cpu_Core=`oc logs $capture_pod | grep $collector_pod | awk '{print $2}' | cut -d 'm' -f1 | awk '{ SUM+= $1} END { print SUM/NR }'` | ||
echo "Avg cpu core/iter: $Cpu_Core" | ||
export current_memory=`oc logs $capture_pod | grep $collector_pod | awk '{print $3}' | cut -d 'M' -f1 | tail -1` | ||
echo "Current Memory=$current_memory" | ||
export Memory=`oc logs $capture_pod | grep $collector_pod | awk '{print $3}' | cut -d 'M' -f1 | awk '{ SUM+= $1} END { print SUM/NR }'` | ||
echo "Avg memory/iter: $Memory" | ||
export end_time=$(date +%s%N | cut -b1-13) | ||
echo "End time: $end_time" | ||
if [ "$total_time" -ge 1800 ]; then break; fi | ||
sleep 10 | ||
done | ||
|
||
export Memory=`oc logs $capture_pod | grep $fluentd_pod | awk '{print $3}' | cut -d 'M' -f1 | awk '{ SUM+= $1} END { print SUM/NR }'` | ||
echo "Avg memory/iter: $Memory" | ||
export end_time=$(date +%s%N | cut -b1-13) | ||
echo "Start time: $start_time" | ||
echo "End time: $end_time" | ||
echo "Exiting after $iterations" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok