Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release #2

Merged
merged 31 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e253873
Fixed goaccess directories
catttam Jul 15, 2024
dd5a5d6
Fixed Dockerfile
catttam Jul 15, 2024
1775b0c
HTTP -> HTTPS
catttam Jul 15, 2024
c08be52
Upload a backup of the ingress logs
catttam Jul 15, 2024
8a3861a
Debug prints
catttam Jul 15, 2024
4ab3183
Deleted debug logs
catttam Jul 15, 2024
a62c84d
Added dates to Prometheus metrics
catttam Jul 15, 2024
beb0aa9
Changed paths to relative path
catttam Jul 15, 2024
6a7bbbb
Fixed recursive path
catttam Jul 15, 2024
a7497af
Updated goaccess output
catttam Jul 15, 2024
00b3887
Added custom config to html output
catttam Jul 15, 2024
bc7f49f
Changed GoAccess extraction approach
catttam Jul 15, 2024
fa22045
Fixed typo on script
catttam Jul 15, 2024
4fe6a14
Fixed paths
catttam Jul 16, 2024
7359efa
Updated dashboard customization
catttam Jul 16, 2024
a1284da
Goaccess changes
catttam Sep 6, 2024
366614e
changes on goaccess metrics
catttam Sep 9, 2024
e3b875d
Changed goaccess installation
catttam Sep 9, 2024
9dd8d6b
Fixed Dockerfile
catttam Sep 9, 2024
2678860
Fixed Dockerfile
catttam Sep 9, 2024
4a7dd35
Fixed Dockerfile
catttam Sep 9, 2024
b1c4e18
Changed goaccess parser
catttam Sep 10, 2024
4ebe551
Updated goacces config
catttam Sep 10, 2024
e627956
Fixed filenames on goaccess script
catttam Sep 10, 2024
4423aa4
Changed parser methods
catttam Sep 10, 2024
1ab6a8d
Debug prints
catttam Sep 10, 2024
eccd5d5
Deleted debug path
catttam Sep 10, 2024
339c92e
Deleted debug logs
catttam Sep 10, 2024
ca13781
Fixed error when no history logs are found
catttam Sep 13, 2024
dd40942
Minor html title change
catttam Sep 13, 2024
1eba1f0
Missing dates on Prometheus metrics
catttam Sep 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM bitnami/python

# Install goaccess tool
RUN apt-get update && \
apt-get install nano goaccess -y
apt-get install vim libncursesw5-dev libmaxminddb-dev -y

# Install aws-cli
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
Expand All @@ -13,13 +13,19 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2
# Verify the installation
RUN aws --version

RUN curl "https://tar.goaccess.io/goaccess-1.5.5.tar.gz" -o "goaccess-1.5.5.tar.gz" && \
tar -xzvf goaccess-1.5.5.tar.gz && \
cd goaccess-1.5.5/ && \
./configure --enable-utf8 --enable-geoip=mmdb --with-openssl && \
make && \
make install

# Install python dependencies
COPY requirements.txt /tmp/
RUN pip3 install -r /tmp/requirements.txt

# Create the directory structure for the metrics
RUN mkdir -p /app/metrics/goaccess_outputs/filtered_by_status_code && \
mkdir -p /app/metrics/rawmetrics && \
RUN mkdir -p /app/metrics/rawmetrics/filtered_by_status_code && \
mkdir -p /app/metrics/goaccess-metrics && \
mkdir -p /app/metrics/prometheus-metrics && \
mkdir -p /app/ui && \
Expand All @@ -35,4 +41,4 @@ COPY metrics_prom.py /app/
COPY create_index.py /app/

COPY dbip-country-lite-2024.mmdb /app/
COPY goaccess.conf /etc/goaccess/
COPY goaccess.conf /usr/local/etc/goaccess/goaccess.conf
16 changes: 9 additions & 7 deletions create_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
cluster_id = os.getenv("CLUSTER_ID")
# Configuration
folder_path = '/app/metrics'
s3_path=f'https://s3.amazonaws.com/metrics.oscar.grycap.net/{cluster_id}/'
assets_base_url = 'https://s3.amazonaws.com/metrics.oscar.grycap.net/assets' # Local path to assets

OUT_PATH="/app/ui/"
Expand Down Expand Up @@ -33,7 +32,7 @@
<div class="mt-48 page-full-wrap relative ">
<input type="hidden" value="creator-page" id="page-type">
<img class="display-image m-auto" data-src="{assets_base_url}/images/logo.png" src="{assets_base_url}/images/logo.png" alt="OSCAR metrics"/>
<h2 class="page-title page-text-color page-text-font mt-24 text-center text-fs-18">OSCAR-ai4eosc metrics index</h2>
<h2 class="page-title page-text-color page-text-font mt-24 text-center text-fs-18">{cluster_id} metrics index"</h2>

<div class="mt-24">
"""
Expand Down Expand Up @@ -66,23 +65,26 @@ def get_icon(file_name):
else:
return f"{assets_base_url}/images/file.png"

def generate_html(out_file, dir_path):
def generate_html(out_file, dir_path, out_url=''):
# Generate HTML content
html_content = html_header

for i, file_name in enumerate(os.listdir(dir_path)):
file_path = os.path.join(dir_path, file_name)
if os.path.isfile(file_path) or os.path.isdir(file_path):
#file_url = file_path.replace("\\", "/")
if os.path.isdir(file_path):
relative_url=file_name+".html"
generate_html(OUT_PATH+relative_url, file_path)
file_url = s3_path+relative_url
generate_html(OUT_PATH+relative_url, file_path, out_url+file_name+"/")
file_url = relative_url
else:
file_url = s3_path+file_name
file_url = out_url+file_name
icon = get_icon(file_name)
if "dashboard" in file_name:
file_name = "GoAccess Dashboard"
if "goaccess-metrics" in file_name:
file_name = "GoAccess Metrics"
if "prometheus-metrics" in file_name:
file_name = "Prometheus Metrics"
file_entry = html_file_entry_template.format(url=file_url, id=i, icon=icon, filename=file_name)
html_content += file_entry

Expand Down
71 changes: 35 additions & 36 deletions extract_goaccess_metrics.sh
Original file line number Diff line number Diff line change
@@ -1,57 +1,45 @@
# Container paths
OUTPUTS_PATH="/app/metrics/rawmetrics"
FILTERED_PATH="$OUTPUTS_PATH/filtered_by_status_code"
OUTPUT_FILE="output"

OUTPUT_FILE="output_OSCAR_goaccess"
# Path to the readonly volume with the cluster's ingress logs
CLUSTER_LOGS_DIR="/var/log/clusterlogs"
LOCAL_LOGS_DIR="/var/log/ingresslogs"
OSCAR_LOGS_DIR="$LOCAL_LOGS_DIR/oscar"

FULL_REPORT_FILE="$OSCAR_LOGS_DIR/oscar.log"
HISTORY_LOGS="$OSCAR_LOGS_DIR/oscar.log"
LATEST_LOGS="$OSCAR_LOGS_DIR/latest_oscar.log"
mkdir -p $OSCAR_LOGS_DIR

# Log format for goaccess
LOG_FORMAT='%^ %^ %^ %h - - [%d:%t] %~ %m %U %^ %s %b %R %u %^'
LOG_FORMAT='%^ %^ %^ %h - - [%d:%t] %~ %m %U %^ %s %^ %R %^ %^'

metrics(){
oscar_logfile="oscar_execlog_"
addLog(){
ingress_logfile=$1
metric_date=$(echo $ingress_logfile | awk -F. '{print $3}')

if [ -z $metric_date ]; then
metric_date="current_"$(date +"%s")
fi

# Filter the ingress logs to get the OSCAR ones
oscar_logfile=$oscar_logfile$metric_date.log
cat $ingress_logfile | grep -a 'oscar-oscar' | grep -a '/job\|/run' > $OSCAR_LOGS_DIR/$oscar_logfile
# Ensure the filtered log file is created and not empty
if [ ! -f "$OSCAR_LOGS_DIR/$oscar_logfile" ] || [ ! -s "$OSCAR_LOGS_DIR/$oscar_logfile" ]; then
echo "[*] Warning: Failed to create log file or no OSCAR execution logs found for $OSCAR_LOGS_DIR/$oscar_logfile"
return
fi

cat $OSCAR_LOGS_DIR/$oscar_logfile | tee -a $FULL_REPORT_FILE >/dev/null
cat $ingress_logfile | grep -a 'oscar-oscar' | grep -a '/job\|/run' | tee -a $HISTORY_LOGS >/dev/null
}

geo_err=$( { cat "$OSCAR_LOGS_DIR/$oscar_logfile" | goaccess - --log-format="${LOG_FORMAT}" -o "${OUTPUTS_PATH}/${OUTPUT_FILE}_${oscar_logfile}.json" --json-pretty-print; } 2>&1 )
if [ ! -f "${OUTPUTS_PATH}/${OUTPUT_FILE}_${oscar_logfile}.json" ]; then
echo "[*] Warning: Couldn't process file $oscar_logfile"
metrics(){
LOG_FILE=$1
filename=`basename "$LOG_FILE"`
geo_err=$( { goaccess "${LOG_FILE}" --log-format="${LOG_FORMAT}" -o "${OUTPUTS_PATH}/${filename}_full.json" --json-pretty-print; } 2>&1 )
if [[ $filename == "latest"* ]]; then
python3 goaccess_metric_parser.py -f "${OUTPUTS_PATH}/${filename}_full.json" -g 0
else
python3 goaccess_metric_parser.py -f "${OUTPUTS_PATH}/${OUTPUT_FILE}_${oscar_logfile}.json" -g 0
python3 goaccess_metric_parser.py -f "${OUTPUTS_PATH}/${filename}_full.json" -g 0 -u
fi

status_codes=('200' '204' '404' '500')
init="t"

out="${FILTERED_PATH}/${OUTPUT_FILE}_${oscar_logfile}"
out="${FILTERED_PATH}/${filename}"

for code in "${status_codes[@]}"; do
code_logs=$(cat $OSCAR_LOGS_DIR/$oscar_logfile | grep -e 'HTTP/[0-9].[0-9]" '${code}' ')
code_logs=$(cat $LOG_FILE| grep -e 'HTTP/[0-9].[0-9]" '${code}' ')
if [ ! -z "$code_logs" ]; then
app_err=$( { cat $OSCAR_LOGS_DIR/$oscar_logfile | grep -e 'HTTP/[0-9].[0-9]" '${code}' ' | goaccess - -o "${out}_f${code}.json" --json-pretty-print --log-format="${LOG_FORMAT}" --enable-panel=REQUESTS --enable-panel=STATUS_CODES; } 2>&1 )
app_err=$( { cat $LOG_FILE | grep -e 'HTTP/[0-9].[0-9]" '${code}' ' | goaccess - -o "${out}_f${code}.json" --json-pretty-print --log-format="${LOG_FORMAT}"; } 2>&1 )
if [ ! -f "${out}_f${code}.json" ]; then
echo "[*] Warning: Couldn't process file $oscar_logfile for status code '$code'"
echo "[*] Warning: Couldn't process file $LOG_FILE for status code '$code'"
else
if [ $init == 't' ]; then
python3 goaccess_metric_parser.py -f "${out}_f${code}.json" -p $code
Expand All @@ -73,6 +61,8 @@ do
for logfile in "$LOCAL_LOGS_DIR/$log/controller/"*;
do
if [[ $logfile == *".gz" ]]; then
# upload a backup of the ingress logs to s3
aws s3 cp $logfile s3://metrics.oscar.grycap.net/"${CLUSTER_ID}"/ingresslogs/
# unzip all log files
gzip -d $logfile
fi
Expand All @@ -84,13 +74,22 @@ done
for logfile in "$LOCAL_LOGS_DIR/$log/controller/"*;
do
if [[ $logfile == *".log"* ]]; then
metrics $logfile
if [[ $logfile == *".log" ]]; then
aws s3 cp $logfile s3://metrics.oscar.grycap.net/"${CLUSTER_ID}"/ingresslogs/
cat $logfile | grep -a 'oscar-oscar' | grep -a '/job\|/run' | tee -a $LATEST_LOGS >/dev/null
metrics $LATEST_LOGS
else
addLog $logfile
fi
fi
done

# Generate the html file
if [ ! -f "${FULL_REPORT_FILE}" ] || [ ! -s "${FULL_REPORT_FILE}" ]; then
echo "Error: Failed to create html report."
exit 1
if [ ! -f "${HISTORY_LOGS}" ] || [ ! -s "${HISTORY_LOGS}" ]; then
goaccess "${LATEST_LOGS}" --log-format="${LOG_FORMAT}" -o "/app/metrics/dashboard.html"
else
metrics $HISTORY_LOGS

cat $LATEST_LOGS | tee -a $HISTORY_LOGS >/dev/null
goaccess "${HISTORY_LOGS}" --log-format="${LOG_FORMAT}" -o "/app/metrics/dashboard.html"
fi
goaccess "${FULL_REPORT_FILE}" --log-format="${LOG_FORMAT}" -o "/app/metrics/dashboard.html"
23 changes: 12 additions & 11 deletions goaccess.conf
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,11 @@ hl-header true
# The parser will only parse the value next to `html-prefs` (single line)
# It allows the ability to customize each panel plot. See example below.
#
#html-prefs {"theme":"bright","perPage":5,"layout":"horizontal","showTables":true,"visitors":{"plot":{"chartType":"bar"}}}
html-prefs {"theme":"dark purple","perPage":5,"layout":"horizontal","showTables":true,"requests":{"title":"Inference requests","plot":{"chartType":"bar"}},"hosts":{"title":"Reached countries","plot":{"chartType":"null"}}}

# Set HTML report page title and header.
#
#html-report-title My Awesome Web Stats
#html-report-title OSCAR Metrics

# Format JSON output using tabs and newlines.
#
Expand Down Expand Up @@ -449,7 +449,7 @@ double-decode false
# Enable parsing/displaying the given panel.
#
#enable-panel VISITORS
#enable-panel REQUESTS
enable-panel REQUESTS
#enable-panel REQUESTS_STATIC
#enable-panel NOT_FOUND
#enable-panel HOSTS
Expand Down Expand Up @@ -499,19 +499,20 @@ crawlers-only false

# Ignore parsing and displaying the given panel.
#
#ignore-panel VISITORS
ignore-panel VISITORS
#ignore-panel REQUESTS
#ignore-panel REQUESTS_STATIC
#ignore-panel NOT_FOUND
ignore-panel REQUESTS_STATIC
ignore-panel NOT_FOUND
#ignore-panel HOSTS
#ignore-panel OS
#ignore-panel BROWSERS
#ignore-panel VISIT_TIMES
ignore-panel OS
ignore-panel ASN
ignore-panel BROWSERS
ignore-panel VISIT_TIMES
#ignore-panel VIRTUAL_HOSTS
ignore-panel REFERRERS
#ignore-panel REFERRING_SITES
ignore-panel REFERRING_SITES
ignore-panel KEYPHRASES
#ignore-panel STATUS_CODES
ignore-panel STATUS_CODES
#ignore-panel REMOTE_USER
#ignore-panel CACHE_STATUS
#ignore-panel GEO_LOCATION
Expand Down
40 changes: 19 additions & 21 deletions goaccess_metric_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,20 @@

with open(args.file_path, 'r') as rawfile:
metrics = json.loads(rawfile.read())
START_DATE = metrics["general"]["start_date"]
END_DATE = metrics["general"]["end_date"]
try:
START_DATE = metrics["general"]["start_date"]
END_DATE = metrics["general"]["end_date"]
except:
START_DATE = metrics["general"]["date_time"]
END_DATE = metrics["general"]["date_time"]


"""
> Countries reached
> Output format: {continent, country, total_visits, unique_visits, start_date, end_date}
"""
def parse_geolocation_info():

with open(f'{OUTPUT_PATH}/{TIMESTAMP}_geolocation_metrics.csv', 'w', newline='') as gfile:
def parse_geolocation_info(write_type):
with open(f'{OUTPUT_PATH}/geolocation_metrics.csv', write_type, newline='') as gfile:
writer = csv.writer(gfile)
fields = ["continent", "country", "total_visits", "unique_visits", "start_metric_date", "end_metric_date"]
writer.writerow(fields)
Expand Down Expand Up @@ -80,22 +84,22 @@ def parse_requests_info(status_code, write_type):
exec_count+=sum_requests

if create_count != 0:
with open(f'{OUTPUT_PATH}/{TIMESTAMP}_created_apps_metrics.csv', write_type, newline='') as cfile:
with open(f'{OUTPUT_PATH}/created_apps_metrics.csv', write_type, newline='') as cfile:
writer = csv.writer(cfile)
if write_type == "w": writer.writerow(["application_count", "status_code", "start_metric_date", "end_metric_date"])
writer.writerow([create_count, status_code, START_DATE, END_DATE])

cfile.close()

if exec_count != 0:
with open(f'{OUTPUT_PATH}/{TIMESTAMP}_total_inference_metrics.csv', write_type, newline='') as efile:
with open(f'{OUTPUT_PATH}/total_inference_metrics.csv', write_type, newline='') as efile:
writer = csv.writer(efile)
if write_type == "w": writer.writerow(["inference_count", "status_code", "start_metric_date", "end_metric_date"])
writer.writerow([exec_count, status_code, START_DATE, END_DATE])

efile.close()

with open(f'{OUTPUT_PATH}/{TIMESTAMP}_services_inference_metrics.csv', write_type, newline='') as sfile:
with open(f'{OUTPUT_PATH}/services_inference_metrics.csv', write_type, newline='') as sfile:
writer = csv.writer(sfile)
if write_type == "w": writer.writerow(["service_name", "exec_type", "status_code", "inference_count" , "start_metric_date", "end_metric_date"])
for k in inference.keys():
Expand All @@ -105,17 +109,11 @@ def parse_requests_info(status_code, write_type):
sfile.close()


if args.general:
parse_geolocation_info()
elif args.partial:
if args.use_existing:
parse_requests_info(args.status_code, "a")
else:
parse_requests_info(args.status_code, "w")






wr="w"
if args.use_existing:
wr="a"

if args.general:
parse_geolocation_info(wr)
if args.partial:
parse_requests_info(args.status_code, wr)
12 changes: 7 additions & 5 deletions metrics_prom.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from datetime import date
import time
import requests
import json
import csv
import argparse
from datetime import datetime, timedelta
from oscar_python.client import Client

QUERY_ENDPOINT = "/api/v1/query?query="
TIME = "5d"
END_DATE = datetime.today()
START_DATE = END_DATE - timedelta(days=5)

parser = argparse.ArgumentParser(description="Command-line to retreive Prometheus metrics from OSCAR", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

Expand Down Expand Up @@ -55,12 +57,12 @@ def query(cpu_usage_query):
return json.loads(response.text)

def generate_file_name():
return f"/app/metrics/prometheus-metrics/metric-{str(int(time.time()))}.csv"
return f"/app/metrics/prometheus-metrics/metric-{END_DATE}.csv"

def extract_metrics(cluster_services):
with open(generate_file_name(), 'w', newline='') as file:
writer = csv.writer(file)
fields = ["service_name", "pod_name", "cpu_usage_seconds", "vo"]
fields = ["service_name", "pod_name", "cpu_usage_seconds", "vo", "start_date", "end_date"]
writer.writerow(fields)

for svc in cluster_services:
Expand All @@ -80,7 +82,7 @@ def extract_metrics(cluster_services):
pod_name = m["metric"]["pod"]
if k in pod_name:
value = m["value"][1]
writer.writerow([svc_name,pod_name, value, svc_vo])
writer.writerow([svc_name,pod_name, value, svc_vo, START_DATE, END_DATE])
else:
if "expose" in svc_name:
cpu_usage_query = get_exposed_query(svc_name)
Expand All @@ -92,7 +94,7 @@ def extract_metrics(cluster_services):
for m in metrics:
#pod_name = m["metric"]["pod"]
value = m["value"][1]
writer.writerow([svc_name,svc_name, value, svc_vo])
writer.writerow([svc_name,svc_name, value, svc_vo, START_DATE, END_DATE])

######## MAIN ##########
if __name__ == "__main__":
Expand Down
Loading