diff --git a/.github/fuzzy-ci-helpers/create_diff.ml b/.github/fuzzy-ci-helpers/create_diff.ml new file mode 100644 index 000000000..0ab15910d --- /dev/null +++ b/.github/fuzzy-ci-helpers/create_diff.ml @@ -0,0 +1,45 @@ +type read_result = Success | Eof + +let read_line_opt () = try Some (read_line ()) with End_of_file -> None +let delimiter1 = Sys.argv.(1) +let delimiter2 = Sys.argv.(2) +let diff_file = Sys.argv.(3) + +let rec loop () = + let rec stdin_to_file ~until file_oc = + match read_line_opt () with + | None -> Eof + | Some line -> + if String.equal line until then Success + else ( + output_string file_oc (line ^ "\n"); + stdin_to_file ~until file_oc) + in + let label1 = read_line_opt () in + let tmp1 = Filename.temp_file "tmp1" "" in + let oc = open_out tmp1 in + let read_result1 = stdin_to_file ~until:delimiter1 oc in + close_out oc; + let label2 = read_line_opt () in + let tmp2 = Filename.temp_file "tmp2" "" in + let oc = open_out tmp2 in + let read_result2 = stdin_to_file ~until:delimiter2 oc in + close_out oc; + match (label1, read_result1, label2, read_result2) with + | Some label1, Success, Some label2, Success -> + let diff_cmd = + Printf.sprintf + "diff -U 5 --label=\"%s\" --label=\"%s\" \"%s\" \"%s\" >> %s" label1 + label2 tmp1 tmp2 diff_file + in + let _ = Sys.command diff_cmd in + loop () + | Some _, Success, Some _, Eof -> + raise (Failure "EOF before reaching delimiter2 when reading second JSON.") + | Some _, Success, None, _ -> + raise (Failure "EOF before reaching delimiter2 when second label.") + | Some _, Eof, _, _ -> + raise (Failure "EOF before reaching delimiter1 when reading first JSON.") + | None, _, _, _ -> () + +let () = loop () diff --git a/.github/workflows/fuzzy-ci.yml b/.github/workflows/fuzzy-ci.yml new file mode 100644 index 000000000..d1f10cac0 --- /dev/null +++ b/.github/workflows/fuzzy-ci.yml @@ -0,0 +1,509 @@ +name: Fuzzy CI + +on: + pull_request: + branches: [ master ] + types: [ opened, synchronize, reopened, unlabeled, labeled ] + paths-ignore: + - '**.md' + - '**.txt' + - '.git*' + - 'doc/**' + - 'emacs/**' + - 'vim/**' + - '**/emacs-lint.yml' + - 'bench/**' + - 'upstream/**' + - 'tests/**' + +env: + # Artifact names need to be consistent across jobs: + BASE_BRANCH_ARTIFACT_NAME: base-branch-data-${{ github.event.pull_request.base.sha }}-pr${{ github.event.pull_request.number }} + MERGE_BRANCH_ARTIFACT_NAME: merge-branch-data-${{ github.event.pull_request.base.sha }}-${{ github.event.pull_request.head.sha }}-pr${{ github.event.pull_request.number }} + DIFF_ARTIFACT_NAME: diff-${{ github.event.pull_request.base.sha }}-${{ github.event.pull_request.head.sha }} + + # File names also need to be consistant across jobs: + FULL_DIFF_FILE: full_responses.diff + DISTILLED_DIFF_FILE: distilled_data.diff + # Note: FULL_DATA_FILE and DISTILLED_DATA_FILE need to be the file names of the files generated by `merl-an behavior` + FULL_DATA_FILE: full_responses.json + DISTILLED_DATA_FILE: distilled_data.json + + # GitHub API related short-hands: + GH_API_COMMENTS: ${{ github.event.pull_request.comments_url }} + GH_API_LABELS: ${{ github.event.pull_request.issue_url }}/labels + GH_API_ARTIFACTS: ${{ github.event.pull_request.base.repo.url }}/actions/artifacts + + # URL short-hands + ACTIONS_RUNS_ENDPOINT: ${{ github.event.repository.html_url }}/actions/runs + CURRENT_ACTION_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }} + + # Irmin version and merl-an version need to be consistent for reproducibility (Irmin is used as the test code base to test `ocamlmerlin` on) + IRMIN_VERSION: 3.9.0 + # TODO: Release merl-an and install a certain version instead of pinning it to a certain commit + MERL_AN_SHA: 1643fb7a9958379fb4ed8d7c5169146aaa88f5b7 + + # The compiler version used on the respective branches. It also needs to form part of Irmin's build cache key. + # Bump either of these whenever the compiler version is bumped on either of the two branches. + merge_branch_COMPILER_VERSION: ocaml-base-compiler.4.14.1 + base_branch_COMPILER_VERSION: ocaml-base-compiler.4.14.1 + +jobs: + data: + name: Generate data + runs-on: ubuntu-22.04 + if: > + github.event.action == 'opened' || + github.event.action == 'synchronize' || + github.event.action == 'reopened' || + ( + github.event.action == 'unlabeled' && + github.event.label.name == 'fuzzy-diff-looks-good' + ) + env: + data_dir: data + strategy: + matrix: + commit: ["merge_branch", "base_branch"] + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Checking out ${{ matrix.commit }} + env: + base_branch_sha: ${{ github.event.pull_request.base.sha }} + merge_branch_sha: ${{ github.sha }} + run: | + sha=$${{ matrix.commit }}_sha + echo "Check out $sha" + git checkout $sha + + - name: Get desired compiler version + id: compiler + run: | + v=$${{ matrix.commit }}_COMPILER_VERSION + echo "version=$v" | tee -a $GITHUB_OUTPUT + + - name: Install OCaml + uses: ocaml/setup-ocaml@v2 + with: + ocaml-compiler: ${{ steps.compiler.outputs.version }} + dune-cache: true + + - name: Install merlin dependencies + run: | + opam pin menhirLib 20201216 --no-action + opam install . --deps-only --yes + + - name: Install merlin + run: | + # Running `subst` to have the current commit in the data produced by `merl-an` + opam exec -- dune subst + opam exec -- dune build -p merlin-lib,dot-merlin-reader,merlin + opam exec -- dune install -p merlin-lib,dot-merlin-reader,merlin + + - name: Pull irmin and its deps from cache if possible + uses: actions/cache@v3 + id: irmin-cache + with: + path: irmin/ + key: os${{ runner.os }}+arch${{ runner.arch }}+${{ hashFiles('fuzzy-ci-helpers/irmin.3.9.0.opam.locked') }}+${{ env.IRMIN_VERSION }}+${{ steps.compiler.outputs.version }} + + - name: Download Irmin tarball + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: | + wget https://github.com/mirage/irmin/releases/download/$IRMIN_VERSION/irmin-$IRMIN_VERSION.tbz + + - name: Create irmin dir + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: mkdir -p irmin + + - name: Decompress Irmin tarball + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: tar xvf irmin-$IRMIN_VERSION.tbz -C irmin --strip-components=1 + + - name: Get Irmin's lock files + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: | + cp .github/fuzzy-ci-helpers/irmin.3.9.0.opam.locked irmin/irmin.opam.locked + + - name: Install opam monorepo + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: opam install opam-monorepo --yes + + - name: Pull in Irmin's dependencies + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: | + git checkout ${{ github.sha }} + opam monorepo pull --lockfile=irmin.opam.locked --yes + working-directory: irmin + + - name: Prune Irmin + if: steps.irmin-cache.outputs.cache-hit != 'true' + run: | + rm -r examples/ bench/ + find test/ -mindepth 1 -maxdepth 1 -type d -not -name 'irmin-pack' -exec rm -r {} \; + find src/ -mindepth 1 -maxdepth 1 -type d \ + -not -name 'irmin-pack' \ + -not -name 'irmin' \ + -not -name 'irmin-tezos' \ + -not -name ppx_irmin \ + -not -name irmin_test \ + -not -name irmin-test \ + -exec rm -r {} \; + working-directory: irmin + + - name: Build Irmin + run: | + opam exec -- dune build @check + working-directory: irmin + + - name: Pull merl-an from cache if possible + uses: actions/cache@v3 + id: merl-an-cache + with: + path: /usr/local/bin/merl-an + key: os${{ runner.os }}+arch${{ runner.arch }}+merl-an-sha$MERL_AN_SHA + + - name: Install merl-an + if: steps.merl-an-cache.outputs.cache-hit != 'true' + run: opam pin -y merl-an https://github.com/pitag-ha/merl-an.git#$MERL_AN_SHA + + - name: Add merl-an to /usr/local/bin/ + if: steps.merl-an-cache.outputs.cache-hit != 'true' + run: opam exec -- cp $GITHUB_WORKSPACE/_opam/bin/merl-an /usr/local/bin/merl-an + + - name: Create data set of Merlin responses + run: | + # Note: The parameters with most influence on the execution time are + # `--sample-size`: Number of samples per file defined by `--project` (and per local query). + # `--project`: List of dirs/files to create samples on. In the case of a dirs, all ml(i) files recursively in the dir are used. + # `--queries`: The `ocamlmerlin` queries that are being run. + opam exec -- merl-an behavior \ + --queries=type-enclosing,occurrences,locate,complete-prefix,errors \ + --sample-size=30 \ + --data=${{ env.data_dir }} \ + --merlin=ocamlmerlin \ + --project=irmin/src/irmin,irmin/src/irmin-pack,irmin/test/irmin-pack + + - name: Remove varying components from data + run: | + # TODO: This could be done on the `merl-an` side + jq '.responses |= map(del(.heap_mbytes, .timings, .cache))' \ + ${{ env.data_dir }}/$FULL_DATA_FILE > temp.json && \ + mv temp.json ${{ env.data_dir }}/$FULL_DATA_FILE + + - name: Create name for data artifact + id: artifact_name + env: + base_branch_artifact_name: ${{ env.BASE_BRANCH_ARTIFACT_NAME }} + merge_branch_artifact_name: ${{ env. MERGE_BRANCH_ARTIFACT_NAME }} + run: echo "name=$${{ matrix.commit }}_artifact_name" >> $GITHUB_OUTPUT + + - name: Upload data + uses: actions/upload-artifact@v3 + with: + name: ${{ steps.artifact_name.outputs.name }} + path: ${{ env.data_dir }} + + - name: Compile diff tool + if: ${{ matrix.commit == 'merge_branch' }} + run: | + # Taking advantage that ocamlopt is installed on this runner: compile the diff tool here and share it with the next job where it's needed. + # All GH runners are hosted on x86 machines and all jobs in this workflow declare the same OS, so this should workTM. + opam exec -- ocamlopt -o create_diff .github/fuzzy-ci-helpers/create_diff.ml + + - name: Upload diff tool + if: ${{ matrix.commit == 'merge_branch' }} + uses: actions/upload-artifact@v3 + with: + name: diff_tool + path: create_diff + + + + diff: + name: Generate diffs + runs-on: ubuntu-22.04 + outputs: + diff_exits: ${{steps.full_responses_diff.outputs.diff_exists}} + needs: data + env: + base_data_dir: base_data + merge_data_dir: merge_data + diff_dir: diff + steps: + - name: Download base branch data + uses: actions/download-artifact@v3 + with: + name: ${{ env.BASE_BRANCH_ARTIFACT_NAME }} + path: ${{ env.base_data_dir }} + + - name: Download merge branch data + uses: actions/download-artifact@v3 + with: + name: ${{ env.MERGE_BRANCH_ARTIFACT_NAME }} + path: ${{ env.merge_data_dir }} + + - name: Create diff dir + run: mkdir -p "$diff_dir" + + - name: Download diff tool + uses: actions/download-artifact@v3 + with: + name: diff_tool + + - name: Give diff tool execute permissions + run: chmod +x create_diff + + - name: Generate full responses diff + id: full_responses_diff + run: | + jq -r -n \ + --slurpfile data1 "$base_data_dir/$FULL_DATA_FILE" \ + --slurpfile data2 "$merge_data_dir/$FULL_DATA_FILE" \ + 'def process_json($branch; $data): + ($branch + ": " + $data.cmd + " (id=" + ($data.sample_id | tostring) + ")"), $data; + range($data1|length) as $i | + process_json("base branch"; $data1[$i]), + "--input-separator--", + process_json("merge branch"; $data2[$i]), + "--diff-cmd-separator--"' \ + | ./create_diff "--input-separator--" "--diff-cmd-separator--" "$diff_dir/$FULL_DIFF_FILE" + if [ -s "$diff_dir/$FULL_DIFF_FILE" ]; then + echo "diff_exists=true" | tee -a $GITHUB_OUTPUT + else + echo "diff_exists=false" | tee -a $GITHUB_OUTPUT + fi + + - name: Generate distilled data diff + # If there's no full reponses diff, there also won't be a distilled data diff + if: ${{ steps.full_responses_diff.outputs.diff_exists == 'true' }} + run: | + jq -r -n \ + --slurpfile data1 "$base_data_dir/$DISTILLED_DATA_FILE" \ + --slurpfile data2 "$merge_data_dir/$DISTILLED_DATA_FILE" \ + 'def process_json($branch; $data): + ($branch + ": " + $data.cmd + " (id=" + ($data.sample_id | tostring) + ")"), $data; + range($data1|length) as $i | + process_json("base branch"; $data1[$i]), + "--input-separator--", + process_json("merge branch"; $data2[$i]), + "--diff-cmd-separator--"' \ + | ./create_diff "--input-separator--" "--diff-cmd-separator--" "$diff_dir/$DISTILLED_DIFF_FILE" + + - name: Upload diff(s) + uses: actions/upload-artifact@v3 + with: + name: ${{ env.DIFF_ARTIFACT_NAME }} + path: ${{ env.diff_dir }} + + + + output: + name: Evaluate diffs + runs-on: ubuntu-22.04 + needs: diff + env: + earlier_diff_was_approved: ${{ contains(github.event.pull_request.labels.*.name, 'fuzzy-diff-looks-good') }} + current_diff_exists: ${{ needs.diff.outputs.diff_exits }} + diff_dir: ${{ needs.artifact_names.outputs.diff_dir }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download current diff(s) + if: ${{ env.current_diff_exists == 'true' }} + uses: actions/download-artifact@v3 + with: + name: ${{ env.DIFF_ARTIFACT_NAME }} + + - name: Retreive hash of approved diff + if: ${{ env.earlier_diff_was_approved == 'true' }} + id: approved_diff + run: | + msg_start=$(head -c 50 .github/fuzzy-ci-helpers/msg.txt) + + next_page_endpoint="$GH_API_COMMENTS?per_page=100&page=1" + latest_comment="{}" + + while [ -n "$next_page_endpoint" ]; do + latest_comment=$( + curl -s -D "headers.txt" -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" "$next_page_endpoint" | + jq --arg msg_start "$msg_start" --argjson latest "{}" ' + map( + select( + (.body | startswith($msg_start)) and .user.login == "github-actions[bot]" + ) + ) + [$latest] | max_by(.created_at)' + ) + + next_page_endpoint=$( + rg '^link:' headers.txt | + tr ',' '\n' | + rg 'rel="next"' | + cut -d'<' -f2 | + cut -d'>' -f1 + ) + done + + hash=$(echo "$latest_comment" | jq '.body' -r | grep '256-sha' | awk '{print $NF}') + echo "hash='$hash'" | tee -a $GITHUB_OUTPUT + + - name: Analyze current diff + id: current_diff + run: | + hash=$(sha256sum "$FULL_DIFF_FILE" | awk '{print $1}') + echo "hash='$hash'" | tee -a $GITHUB_OUTPUT + + - name: Write instruction to delete PR label + # When this workflow is triggered by a PR from a fork, it doesn't have + # the permissions to delete PR labels. Instead, we forward the + # instruction to delete the label to fuzzy-ci-privileged.yml. + if: ${{ env.earlier_diff_was_approved == 'true' && steps.approved_diff.outputs.hash != steps.current_diff.outputs.hash }} + run: | + echo ${{ steps.approved_diff.outputs.hash }} + echo ${{ steps.current_diff.outputs.hash }} + mkdir -p ./forward + jq -n \ + --arg instruction "delete_label" \ + --arg endpoint "$GH_API_LABELS" \ + '{instruction: $instruction, endpoint: $endpoint}' > ./forward/instruction.json + + - name: Upload instruction to delete label + if: ${{ env.earlier_diff_was_approved == 'true' && steps.approved_diff.outputs.hash != steps.current_diff.outputs.hash }} + uses: actions/upload-artifact@v3 + with: + name: forwarded_instructions + path: forward/ + + - name: Return + id: return + env: + github_api_labels_url: ${{ github.event.pull_request.base.repo.url }}/issues/${{ github.event.pull_request.number }}/labels + run: | + print_head_of_diffs () { + echo "--------beginning of full responses diff head--------" + head -n 100 "$FULL_DIFF_FILE" + echo "--------end of full responses diff head--------" + echo "--------beginning of distilled data diff head--------" + head -n 100 "$DISTILLED_DIFF_FILE" + echo "--------end of distilled data diff head--------" + } + + # FIXME (?): Are nested conditionals always so ugly in Bash, or is there a better way? Option types and the possibility to match would help a lot. + LABEL_NAME=$(cat .github/fuzzy-ci-helpers/label_name.txt) + if $earlier_diff_was_approved; then + echo "Earlier diff was approved." + if [ ${{ steps.current_diff.outputs.hash }} == ${{ steps.approved_diff.outputs.hash }} ]; then + echo "This diff has been approved earlier. Everything ok." + exit 0 + else + print_head_of_diffs + printf "The diff has changed since it was approved. So I'm removing the $LABEL_NAME label. If the new diff looks good, please set the label again.\n\ + There's a head of the new diffs printed above. The whole diffs can be downloaded from $CURRENT_ACTION_URL .\n\ + Previous sha256: ${{ steps.approved_diff.outputs.hash }}\n\ + Current sha256: ${{ steps.current_diff.outputs.hash }}" + echo "delete_label=true" >> $GITHUB_OUTPUT + exit 1 + fi + else + if $current_diff_exists; then + print_head_of_diffs + printf "There's a head of the diffs printed above. The diffs can be downloaded from $CURRENT_ACTION_URL .\nIf it looks good, please set the $LABEL_NAME label on the PR." + exit 1 + else + echo "No diff. All good." + exit 0 + fi + fi + + + approve: + name: Approve diff + if: > + github.event_name == 'pull_request' && + github.event.action == 'labeled' && + github.event.label.name == 'fuzzy-diff-looks-good' + runs-on: ubuntu-22.04 + steps: + - name: Retreive diff artifact meta-data + id: diff_metadata + run: | + all_artifacts=$(curl -sSL "$GH_API_ARTIFACTS") + diff_artifact=$(echo "$all_artifacts" | jq "first(.artifacts[] | select(.name == \"$DIFF_ARTIFACT_NAME\") )") + id=$(echo "$diff_artifact" | jq ".id") + echo "id=$id" | tee -a $GITHUB_OUTPUT + workflow_run=$(echo "$diff_artifact" | jq ".workflow_run | .id") + echo "workflow_run=$workflow_run" | tee -a $GITHUB_OUTPUT + if [ -z $id ]; then + echo "exists=false" | tee -a $GITHUB_OUTPUT + else + echo "exists=true" | tee -a $GITHUB_OUTPUT + fi + + - name: Write instruction to delete PR label + # When this workflow is triggered by a PR from a fork, it doesn't have + # the permissions to delete PR labels. Instead, we forward the + # instruction to delete the label to fuzzy-ci-privileged.yml. + if: ${{ steps.diff_metadata.outputs.exists == 'false' }} + run: | + mkdir -p ./forward + jq -n \ + --arg instruction "delete_label" \ + --arg endpoint "$GH_API_LABELS" \ + '{instruction: $instruction, endpoint: $endpoint}' > ./forward/instruction.json + + - name: Upload instruction to delete label + if: ${{ steps.diff_metadata.outputs.exists == 'false' }} + uses: actions/upload-artifact@v3 + with: + name: forwarded_instructions + path: forward/ + + - name: Fail due to diff not existing yet + if: ${{ steps.diff_metadata.outputs.exists == 'false' }} + run: | + printf "You seem to have tried to approve a diff that doesn't exist yet.\nWait for the diff to have been generated and then try again." + exit 1 + + - name: Download diff + env: + id: ${{ steps.diff_metadata.outputs.id }} + run: | + # Doing this manually, since actions/download-artifact only works on the same workflow run on which the artifact was uploaded + curl -sSLO -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" "$GH_API_ARTIFACTS/$id/zip" -D headers.txt + + - name: Unzip downloaded diff + run: | + unzip zip || (echo "Download of diff artifact failed" && cat headers.txt && cat zip && exit 1) + + - name: Compute full responses diff hash + id: diff_hash + run: | + hash=$(sha256sum "$FULL_DIFF_FILE" | awk '{print $1}') + echo "hash=$hash" | tee -a $GITHUB_OUTPUT + + - name: Write instruction to comment on PR + # When this workflow is triggered by a PR from a fork, it doesn't have + # the permissions to comment on PRs. Instead, we forward the + # instruction to comment on the PR to fuzzy-ci-privileged.yml. + env: + approved_diffs_workflow_run: ${{ steps.diff_metadata.outputs.workflow_run }} + approved_diffs_hash: ${{ steps.diff_hash.outputs.hash }} + run: | + mkdir -p ./forward + jq -n \ + --arg instruction "comment" \ + --arg endpoint "$GH_API_COMMENTS" \ + --arg artifacts_url "$ACTIONS_RUNS_ENDPOINT/$approved_diffs_workflow_run" \ + --arg hash "$approved_diffs_hash" \ + '{instruction: $instruction, endpoint: $endpoint, artifacts_url: $artifacts_url, hash: $hash}' > ./forward/instruction.json + + - name: Upload instruction to comment on PR + uses: actions/upload-artifact@v3 + with: + name: forwarded_instructions + path: forward/ diff --git a/CHANGES.md b/CHANGES.md index d7e1121fe..67bccef0f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,7 @@ merlin NEXT_VERSION + merlin binary - Add a "heap_mbytes" field to Merlin server responses to report heap usage (#1717) - Add cache stats to telemetry (#1711) - + - Add a query_num field to the `ocamlmerlin` responses to detect server crashes (#1716) merlin 4.13 ===========