From 6520bbf4d09875d99f41269f48da80c9f8225245 Mon Sep 17 00:00:00 2001 From: miranov25 Date: Sun, 15 Dec 2024 11:26:18 +0100 Subject: [PATCH] ATO-648 - porting the code from the TPC gitlab to the O2DPG tools --- UTILS/Parsers/workflowToJSON.sh | 138 ++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 UTILS/Parsers/workflowToJSON.sh diff --git a/UTILS/Parsers/workflowToJSON.sh b/UTILS/Parsers/workflowToJSON.sh new file mode 100644 index 000000000..1b0f530cb --- /dev/null +++ b/UTILS/Parsers/workflowToJSON.sh @@ -0,0 +1,138 @@ +# Source path for the script +# source $NOTES/JIRA/ATO-648/workflowToJSON.sh + +# Description: +# This script converts workflow configuration logs into a structured JSON format for enhanced data analysis and readability. + +# Usage: +# Example: source $O2DPG/UTILS/Parsers/workflowToJSON.sh + +alias helpCat=cat +[[ -x "$(command -v pygmentize)" ]] && alias helpCat="pygmentize -O style=borland,linenos=1 -l bash" + +function helpCat0() { + local language="$1" + shift + [[ -x "$(command -v pygmentize)" ]] && pygmentize -O style=monokai,linenos=1 -l "$language" | cat - "$@" + [[ ! -x "$(command -v pygmentize)" ]] && cat - "$@" +} + +init648() { + cat < --configKeyValues + +Example Command: +A typical command in the log might appear like this: +o2-ctf-reader-workflow --session default_1304519_3825 --severity info --shm-segment-id 0 --shm-segment-size 64000000000 --resources-monitoring 50 --resources-monitoring-dump-interval 50 --early-forward-policy noraw --fairmq-rate-logging 0 --timeframes-rate-limit 2 --timeframes-rate-limit-ipcid 0 --ans-version compat --delay 1 --loop 0 --max-tf 2 --ctf-input list.list --onlyDet ITS,TPC,TOF,FV0,FT0,FDD,TRD,CTP --pipeline tpc-entropy-decoder:1 --allow-missing-detectors --its-digits --mft-digits --configKeyValues "keyval.input_dir=/tmp/tmp.rgwfzmuG63;keyval.output_dir=/dev/null;;" + +Transformation: +This script processes each command line from the log, turning them into JSON objects. This structural change not only organizes the data but also enhances accessibility for programmatic queries and analysis. +HELP_USAGE +} + + +makeParse() { + # Use heredoc to send help text through helpCat alias, which will apply syntax highlighting if pygmentize is available. + if [[ -z "$1" ]]; then + # Use heredoc to send help text through helpCat alias, which will apply syntax highlighting if pygmentize is available. + cat <<'HELP_USAGE' | helpCat0 bash +makeParse: Parse the workflow log and create an output.json file. +Usage: + makeParse + +Example usage: + #makeParse workflowconfig.log > ~/output.json # To parse a specific log file. + makeParse /lustre/alice/tpcdata/Run3/SCDprodTests/fullRec/PbPb_Streamers_Tune_ClusterErrors-merge-streamer/avgCharge_fullTPC_sampling_TimeBins16-Average0_rejectEdgeCl-Seed0-Track0-margin0/LHC23zzh.b5p/544116.38kHz/0110/workflowconfig.log > workflow.json + cat workflow.json | jq '.[] | select(.command | test("^o2-dpl"))' # Filter DPL workflows. + jq '.[] | select(.command | test("^o2-gpu"))' workflow.json # Filter GPU related commands. + +HELP_USAGE + return # Exit the function if no parameters provided + fi + # + log_file=$1 +jq -Rn ' + [inputs | split("\n")[] | select(length > 0 and startswith("o2-")) | + { + command: (split(" ")[0]), + switches: (split(" ") | .[1:-1] | + reduce .[] as $item ({}; + if $item | startswith("--") then + if $item | contains("=") then + . + ({($item | ltrimstr("--") | split("=")[0]): ($item | split("=")[1])}) + else + . + ({($item | ltrimstr("--")): true}) + end + else + .[keys_unsorted[-1]] = $item + end + )), + configKeyValues: (if (contains("--configKeyValues")) then + (split("--configKeyValues")[1] | split("|")[0] | gsub("^\\s+\"|\"\\s+;"; "") | split(";") | + map(select(. != "" and contains("="))) | + map(split("=") | select(length == 2)) | + map({(.[0]): .[1]})) | add + else + {} + end + ) + } + ]' "$log_file" +} + +makeDiffExample(){ + cat < workflow0.json\` + + \`makeParse /lustre/alice/tpcdata/Run3/SCDprodTests/fullRec/PbPb_Streamers_Tune_ClusterErrors-merge-streamer/avgCharge_fullTPC_sampling_TimeBins16-Average0_rejectEdgeCl-Seed0-Track0-margin0-ref/LHC23zzh.b5p/544116.38kHz/0110/workflowconfig.log > workflow1.json\` + +2. Compare two JSON files using \`jq\` and \`diff\` directly without temporary files: + \diff <(jq --sort-keys . workflow1.json) <(jq --sort-keys . workflow0.json)\ + +3. Use \`diff\` with side-by-side view and color using ANSI color codes: + \`diff --side-by-side --left-column --color=always <(jq --sort-keys . workflow1.json) <(jq --sort-keys . workflow0.json) | less -R\` + +4. Compare JSON files focusing only on commands starting with "o2-gpu": + Filtering the entries where the command starts with "o2-gpu" and then comparing: + \ diff --side-by-side --left-column --color=always <(jq '.[] | select(.command | test("^o2-gpu"))' workflow1.json | jq --sort-keys .) <(jq '.[] | select(.command | test("^o2-gpu"))' workflow0.json | jq --sort-keys .) | less -R +HELP_USAGE +} + +init648 \ No newline at end of file