try llm diff with validation

smartcontractkit · Aug 12, 2024 · b674d5a · b674d5a
1 parent 4fe3a8f
commit b674d5a
Show file tree

Hide file tree

Showing 5 changed files with 162 additions and 43 deletions.
diff --git a/.github/workflows/solidity-foundry.yml b/.github/workflows/solidity-foundry.yml
@@ -401,46 +401,21 @@ jobs:
         env:
           OPEN_API_KEY: ${{ secrets.OPEN_AI_SLITHER_API_KEY }}
         run: |
+          echo "Current Slither reports:"
+          ls -al contracts/slither-reports-current
+          echo "Base Slither reports:"
+          ls -al contracts/slither-reports-base-ref
           for base_report in contracts/slither-reports-base-ref/*.md; do
             filename=$(basename "$base_report")
             current_report="contracts/slither-reports-current/$filename"
-            diff_report="contracts/slither-reports-current/${filename%.md}_diff.md"
-            if [ -f "$current_file" ]; then
-              report1=$(cat $base_report | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
-              report2=$(cat $current_report | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
-              openai_prompt=$(cat contracts/scripts/ci/prompt.md | sed 's/"/\\"/g' | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
-              openai_model="gpt-4o-mini"
-              openai_result=$(echo '{
-                "model": "'$openai_model'",
-                "temperature": 0.1,
-                "messages": [
-                  {
-                    "role": "system",
-                    "content": "'$openai_prompt' \nreport1:\n```'$report1'```\nreport2:\n```'$report2'```"
-                  }
-                ]
-                }' | envsubst | curl https://api.openai.com/v1/chat/completions \
-                              -w "%{http_code}" \
-                              -o prompt_response.json \
-                              -H "Content-Type: application/json" \
-                              -H "Authorization: Bearer $OPEN_API_KEY" \
-                              -d @-
-                )
-          
-              # throw error openai_result when is not 200
-              if [ "$openai_result" != '200' ]; then
-                echo "::error::OpenAI API call failed with status $openai_result: $(cat prompt_response.json)"
-                exit 1
-              fi
-          
-              # replace lines starting with ' -' (1space) with '  -' (2spaces)
-              response_content=$(cat prompt_response.json | jq -r '.choices[0].message.content')
-              echo "$response_content" | sed -e 's/^ -/  -/g' > $diff_report
+            new_issues_report="contracts/slither-reports-current/${filename%.md}_new_issues.md"
+            if [ -f "$current_report" ]; then
+              ./contracts/scripts/ci/find_slither_report_diff.sh "$base_report" "$current_report" "$new_issues_report" "contracts/scripts/ci/prompt-difference.md" "contracts/scripts/ci/prompt-validation.md"
           
               if [ -s $diff_report ]; then    
-                awk 'NR==2{print "*This diff has been automatically generated by LLM model using two Slither reports. One based on `${{ github.base_ref}}` and another on `${{ github.sha }}` commits.*"}1' $diff_report > tmp.md && mv tmp.md $diff_report                                  
+                awk 'NR==2{print "*This new issues report has been automatically generated by LLM model using two Slither reports. One based on `${{ github.base_ref}}` and another on `${{ github.sha }}` commits.*"}1' $new_issues_report > tmp.md && mv tmp.md $new_issues_report                                  
                 echo "Replacing full Slither report with diff for $current_report"            
-                rm $current_report && mv $diff_report $current_report
+                rm $current_report && mv $new_issues_report $current_report
               else 
                 echo "No difference detected between $base_report and $current_report reports. Won't include any of them."
                 rm $current_report

diff --git a/contracts/scripts/ci/find_slither_report_diff.sh b/contracts/scripts/ci/find_slither_report_diff.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+if [[ "$#" -lt 4 ]]; then
+  >&2 echo "Generates a markdown file with diff in new issues detected by ChatGPT between two Slither reports."
+  >&2 echo "Usage: $0 <path-to-first-report> <path-to-second-report> <path-to-diff-report-output> <path-to-prompt> [path-to-validation-prompt]"
+  exit 1
+fi
+
+if [[ -z "${OPEN_API_KEY+x}" ]]; then
+  >&2 echo "OPEN_API_KEY is not set."
+  exit 1
+fi
+
+first_report_path=$1
+second_report_path=$2
+new_issues_report_path=$3
+report_prompt_path=$4
+if [[ "$#" -eq 5 ]]; then
+  validation_prompt_path=$5
+else
+  validation_prompt_path=""
+fi
+
+first_report_content=$(cat "$first_report_path" | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
+second_report_content=$(cat "$second_report_path" | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
+openai_prompt=$(cat "$report_prompt_path" | sed 's/"/\\"/g' | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
+openai_model="gpt-4o"
+openai_result=$(echo '{
+  "model": "'$openai_model'",
+  "temperature": 0.1,
+  "messages": [
+    {
+      "role": "system",
+      "content": "'$openai_prompt' \nreport1:\n```'$first_report_content'```\nreport2:\n```'$second_report_content'```"
+    }
+  ]
+}' | envsubst | curl https://api.openai.com/v1/chat/completions \
+              -w "%{http_code}" \
+              -o prompt_response.json \
+              -H "Content-Type: application/json" \
+              -H "Authorization: Bearer $OPEN_API_KEY" \
+              -d @-
+)
+
+# throw error openai_result when is not 200
+if [ "$openai_result" != '200' ]; then
+  echo "::error::OpenAI API call failed with status $openai_result: $(cat prompt_response.json)"
+  exit 1
+fi
+
+# replace lines starting with ' -' (1space) with '  -' (2spaces)
+response_content=$(cat prompt_response.json | jq -r '.choices[0].message.content')
+new_issues_report_content=$(echo "$response_content" | sed -e 's/^ -/  -/g')
+echo "$new_issues_report_content" > "$new_issues_report_path"
+
+if [[ -n "$validation_prompt_path" ]]; then
+  echo "::debug::Validating the diff report using the validation prompt"
+  report_input=$(echo "$new_issues_report_content" | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
+  validation_prompt_content=$(cat "$validation_prompt_path" | sed 's/"/\\"/g' | sed -E 's/\\+$//g' | sed -E 's/\\+ //g')
+  validation_result=$(echo '{
+    "model": "'$openai_model'",
+    "temperature": 0.1,
+    "messages": [
+      {
+        "role": "system",
+        "content": "'$validation_prompt_content' \nreport1:\n```'$first_report_content'```\nreport2:\n```'$second_report_content'```\nnew_issues:\n```'$report_input'```"
+      }
+    ]
+  }' | envsubst | curl https://api.openai.com/v1/chat/completions \
+                -w "%{http_code}" \
+                -o prompt_validation_response.json \
+                -H "Content-Type: application/json" \
+                -H "Authorization: Bearer $OPEN_API_KEY" \
+                -d @-
+  )
+
+  # throw error openai_result when is not 200
+  if [ "$validation_result" != '200' ]; then
+    echo "::error::OpenAI API call failed with status $validation_result: $(cat prompt_validation_response.json)"
+    exit 1
+  fi
+
+  # replace lines starting with ' -' (1space) with '  -' (2spaces)
+  response_content=$(cat prompt_validation_response.json | jq -r '.choices[0].message.content')
+
+  echo "$response_content" | sed -e 's/^ -/  -/g' >> "$new_issues_report_path"
+  echo "" >> "$new_issues_report_path"
+  echo "*Confidence rating presented above is an automatic validation (self-check) of the differences between two reports generated by ChatGPT ${openai_model} model*." >> "$new_issues_report_path"
+fi
diff --git a/contracts/scripts/ci/prompt.md → contracts/scripts/ci/prompt-difference.md b/contracts/scripts/ci/prompt.md → contracts/scripts/ci/prompt-difference.md
@@ -1,23 +1,19 @@
 You are a helpful expert data engineer with expertise in Blockchain and Decentralized Oracle Networks.
 
-Given two reports generated by Slither - a Solidity static analysis tool - provided at the bottom of the reply, your task is to help create a report for your peers with meaningful differences between both reports in order to decrease noise resulting from irrelevant changes to the report, by focusing on 2 primary topics: **New Issues** and **Resolved Issues**.
+Given two reports generated by Slither - a Solidity static analysis tool - provided at the bottom of the reply, your task is to help create a report for your peers with new issues introduced in the second report in order to decrease noise resulting from irrelevant changes to the report, by focusing on a single topic: **New Issues**.
 
 First report is provided under Heading 2 (##) called `report1` and is surrounded by triple backticks (```) to indicate the beginning and end of the report.
 Second report is provided under Heading 2 (##) called `report2` and is surrounded by triple backticks (```) to indicate the beginning and end of the report.
 
-First report is report generated by Slither using default branch of the code repository. Second report is report generated by Slither using a feature branch of the code repository. You want to help your peers understand the impact of changes they introduced in the pull request on the codebase.
+First report is report generated by Slither using default branch of the code repository. Second report is report generated by Slither using a feature branch of the code repository. You want to help your peers understand the impact of changes they introduced in the pull request on the codebase and whether they introduced any new issues.
 
 **New Issues**
 
-Provide a bullet point summary of new issues that were introduced in the second report.
+Provide a bullet point summary of new issues that were introduced in the second report. If a given issue is not present in first report, but is present in the second one, it is considered a new issue. If the count for given issue type is higher in the second report than in the first one, it is considered a new issue.
 For each issue include original description text from the report together with severity level, issue ID, line number and a link to problematic line in the code.
 Group the issues by their type, which is defined as Heading 2 (##).
 
-**Resolved Issues**
-Provide a bullet point summary of resolved issues that were fixed in the second report.
-Do not include any other details about resolved issues.
-
-Output your response starting from**New Issues** and **Resolved Issues** in escaped, markdown text that can be sent as http body to API. Do not wrap output in code blocks.
+Output your response starting from**New Issues** in escaped, markdown text that can be sent as http body to API. Do not wrap output in code blocks.
 Extract the name of the file from the first line of the report and title the new report with it in a following way: "# Slither diff report for: <file_name>"
 
-Format **New Issues** and **Resolved Issues** as Heading 2 using double sharp characters (##). Otherwise, do not include any another preamble and postamble to your answer.
+Format **New Issues** as Heading 2 using double sharp characters (##). Otherwise, do not include any another preamble and postamble to your answer.
diff --git a/contracts/scripts/ci/prompt-validation.md b/contracts/scripts/ci/prompt-validation.md
@@ -0,0 +1,31 @@
+You are a helpful expert data engineer with expertise in Blockchain and Decentralized Oracle Networks.
+
+At the bottom of the reply you will find two reports generated by Slither - a Solidity static analysis tool - and another report that contains new issues found in the second report.
+Your task is to evaluate how well that new issues report shows all new issues mentioned in the second Slither report and assert its completeness.
+Rate your confidence in the completeness of the new issues report on a scale from 1 to 5, where 1 means it's missing all new issues and 5 means that all new issues are present.
+
+First report is provided under Heading 2 (##) called `report1` and is surrounded by triple backticks (```) to indicate the beginning and end of the report.
+Second report is provided under Heading 2 (##) called `report2` and is surrounded by triple backticks (```) to indicate the beginning and end of the report.
+New issues report is provided under Heading 2 (##) called `new_issues` and is surrounded by triple backticks (```) to indicate the beginning and end of the report.
+
+Use the following steps to evaluate the new issues report:
+* each report begins with a summary with types of issues found and number of issues found for each type, called "# Summary for <file_name>"
+* group issues by type and count for each report and calculate the expected difference in number of issues for each type for each report
+* exclude all issue types, for which the count for is higher in the first report than in the second one
+* for each remaining issue type, compare the number of issues found in the new issues report with the expected difference
+* evaluate if the new issues report captures all new issues introduced in the second report
+
+Do not focus on:
+* the quality of the Slither reports themselves, but rather on whether all new issues from the second report are present in the  new issues report
+* how well the new issues report is structured or written and how well it presents new issues
+
+It is crucial that you ignore all differences in the reports that are not related to new issues, such as resolved issues or issues, which count has decreased.
+
+If a given issue is not present in first report, but is present in the second one, it is considered a new issue. Similar behaviour is expected from the new issues report.
+If the count for given issue type is higher in the second report than in the first one, it is considered a new issue.
+
+Your report should include only a single section titled "Confidence level".
+Your evaluation of the completeness of the new issues report should be displayed as a Heading 3 using triple sharp characters (###). In a new line a brief explanation of the scale used, with minimum and maximum possible values.
+
+Output your response as escaped, markdown text that can be sent as http body to API. Do not wrap output in code blocks. Do not include any partial results or statistics regarding the number of new and resolved issues in any of the reports.
+Format **Confidence level** as Heading 2 using double sharp characters (##). Otherwise, do not include any another preamble and postamble to your answer.
diff --git a/contracts/src/v0.8/automation/v2_3/AutomationRegistrar2_3.sol b/contracts/src/v0.8/automation/v2_3/AutomationRegistrar2_3.sol
@@ -439,4 +439,30 @@ contract AutomationRegistrar2_3 is TypeAndVersionInterface, ConfirmedOwner, IERC
     }
     return false;
   }
+
+  function _callWithExactGasEvenIfTargetIsNoContract(
+    bytes memory payload,
+    address target,
+    uint256 gasLimit,
+    uint16 gasForCallExactCheck
+  ) internal returns (bool success, bool sufficientGas) {
+    assembly {
+      let g := gas()
+    // Compute g -= CALL_WITH_EXACT_GAS_CUSHION and check for underflow. We
+    // need the cushion since the logic following the above call to gas also
+    // costs gas which we cannot account for exactly. So cushion is a
+    // conservative upper bound for the cost of this logic.
+      if iszero(lt(g, gasForCallExactCheck)) {
+        g := sub(g, gasForCallExactCheck)
+      // If g - g//64 <= gasAmount, we don't have enough gas. We subtract g//64 because of EIP-150.
+        if gt(sub(g, div(g, 64)), gasLimit) {
+        // Call and ignore success/return data. Note that we did not check
+        // whether a contract actually exists at the target address.
+          success := call(gasLimit, target, 0, add(payload, 0x20), mload(payload), 0x0, 0x0)
+          sufficientGas := true
+        }
+      }
+    }
+    return (success, sufficientGas);
+  }
 }