Skip to content

Ignore SIGPIPE in the Merlin server process #92

Ignore SIGPIPE in the Merlin server process

Ignore SIGPIPE in the Merlin server process #92

Workflow file for this run

name: Fuzzy CI
on:
pull_request:
branches: [ master ]
types: [ opened, synchronize, reopened, unlabeled, labeled ]
paths-ignore:
- '**.md'
- '**.txt'
- '.git*'
- 'doc/**'
- 'emacs/**'
- 'vim/**'
- '**/emacs-lint.yml'
- 'bench/**'
- 'upstream/**'
- 'tests/**'
env:
# Artifact names need to be consistent across jobs:
BASE_BRANCH_ARTIFACT_NAME: base-branch-data-${{ github.event.pull_request.base.sha }}-pr${{ github.event.pull_request.number }}
MERGE_BRANCH_ARTIFACT_NAME: merge-branch-data-${{ github.event.pull_request.base.sha }}-${{ github.event.pull_request.head.sha }}-pr${{ github.event.pull_request.number }}
DIFF_ARTIFACT_NAME: diff-${{ github.event.pull_request.base.sha }}-${{ github.event.pull_request.head.sha }}
# File names also need to be consistant across jobs:
FULL_DIFF_FILE: full_responses.diff
DISTILLED_DIFF_FILE: distilled_data.diff
# Note: FULL_DATA_FILE and DISTILLED_DATA_FILE need to be the file names of the files generated by `merl-an behavior`
FULL_DATA_FILE: full_responses.json
DISTILLED_DATA_FILE: distilled_data.json
# GitHub API related short-hands:
GH_API_COMMENTS: ${{ github.event.pull_request.comments_url }}
GH_API_LABELS: ${{ github.event.pull_request.issue_url }}/labels
GH_API_ARTIFACTS: ${{ github.event.pull_request.base.repo.url }}/actions/artifacts
# URL short-hands
ACTIONS_RUNS_ENDPOINT: ${{ github.event.repository.html_url }}/actions/runs
CURRENT_ACTION_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
# Irmin version and merl-an version need to be consistent for reproducibility (Irmin is used as the test code base to test `ocamlmerlin` on)
IRMIN_VERSION: 3.9.0
# TODO: Release merl-an and install a certain version instead of pinning it to a certain commit
MERL_AN_SHA: 1643fb7a9958379fb4ed8d7c5169146aaa88f5b7
# The compiler version used on the respective branches. It also needs to form part of Irmin's build cache key.
# Bump either of these whenever the compiler version is bumped on either of the two branches.
merge_branch_COMPILER_VERSION: ocaml-base-compiler.4.14.1
base_branch_COMPILER_VERSION: ocaml-base-compiler.4.14.1
jobs:
data:
name: Generate data
runs-on: ubuntu-22.04
if: >
github.event.action == 'opened' ||
github.event.action == 'synchronize' ||
github.event.action == 'reopened' ||
(
github.event.action == 'unlabeled' &&
github.event.label.name == 'fuzzy-diff-looks-good'
)
env:
data_dir: data
strategy:
matrix:
commit: ["merge_branch", "base_branch"]
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checking out ${{ matrix.commit }}
env:
base_branch_sha: ${{ github.event.pull_request.base.sha }}
merge_branch_sha: ${{ github.sha }}
run: |
sha=$${{ matrix.commit }}_sha
echo "Check out $sha"
git checkout $sha
- name: Get desired compiler version
id: compiler
run: |
v=$${{ matrix.commit }}_COMPILER_VERSION
echo "version=$v" | tee -a $GITHUB_OUTPUT
- name: Install OCaml
uses: ocaml/setup-ocaml@v2
with:
ocaml-compiler: ${{ steps.compiler.outputs.version }}
dune-cache: true
- name: Install merlin dependencies
run: |
opam pin menhirLib 20201216 --no-action
opam install . --deps-only --yes
- name: Install merlin
run: |
# Running `subst` to have the current commit in the data produced by `merl-an`
opam exec -- dune subst
opam exec -- dune build -p merlin-lib,dot-merlin-reader,merlin
opam exec -- dune install -p merlin-lib,dot-merlin-reader,merlin
- name: Pull irmin and its deps from cache if possible
uses: actions/cache@v3
id: irmin-cache
with:
path: irmin/
key: os${{ runner.os }}+arch${{ runner.arch }}+${{ hashFiles('fuzzy-ci-helpers/irmin.3.9.0.opam.locked') }}+${{ env.IRMIN_VERSION }}+${{ steps.compiler.outputs.version }}
- name: Download Irmin tarball
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
wget https://github.com/mirage/irmin/releases/download/$IRMIN_VERSION/irmin-$IRMIN_VERSION.tbz
- name: Create irmin dir
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: mkdir -p irmin
- name: Decompress Irmin tarball
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: tar xvf irmin-$IRMIN_VERSION.tbz -C irmin --strip-components=1
- name: Get Irmin's lock files
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
cp .github/fuzzy-ci-helpers/irmin.3.9.0.opam.locked irmin/irmin.opam.locked
- name: Install opam monorepo
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: opam install opam-monorepo --yes
- name: Pull in Irmin's dependencies
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
git checkout ${{ github.sha }}
opam monorepo pull --lockfile=irmin.opam.locked --yes
working-directory: irmin
- name: Prune Irmin
if: steps.irmin-cache.outputs.cache-hit != 'true'
run: |
rm -r examples/ bench/
find test/ -mindepth 1 -maxdepth 1 -type d -not -name 'irmin-pack' -exec rm -r {} \;
find src/ -mindepth 1 -maxdepth 1 -type d \
-not -name 'irmin-pack' \
-not -name 'irmin' \
-not -name 'irmin-tezos' \
-not -name ppx_irmin \
-not -name irmin_test \
-not -name irmin-test \
-exec rm -r {} \;
working-directory: irmin
- name: Build Irmin
run: |
opam exec -- dune build @check
working-directory: irmin
- name: Pull merl-an from cache if possible
uses: actions/cache@v3
id: merl-an-cache
with:
path: /usr/local/bin/merl-an
key: os${{ runner.os }}+arch${{ runner.arch }}+merl-an-sha$MERL_AN_SHA
- name: Install merl-an
if: steps.merl-an-cache.outputs.cache-hit != 'true'
run: opam pin -y merl-an https://github.com/pitag-ha/merl-an.git#$MERL_AN_SHA
- name: Add merl-an to /usr/local/bin/
if: steps.merl-an-cache.outputs.cache-hit != 'true'
run: opam exec -- cp $GITHUB_WORKSPACE/_opam/bin/merl-an /usr/local/bin/merl-an
- name: Create data set of Merlin responses
run: |
# Note: The parameters with most influence on the execution time are
# `--sample-size`: Number of samples per file defined by `--project` (and per local query).
# `--project`: List of dirs/files to create samples on. In the case of a dirs, all ml(i) files recursively in the dir are used.
# `--queries`: The `ocamlmerlin` queries that are being run.
opam exec -- merl-an behavior \
--queries=type-enclosing,occurrences,locate,complete-prefix,errors \
--sample-size=30 \
--data=${{ env.data_dir }} \
--merlin=ocamlmerlin \
--project=irmin/src/irmin,irmin/src/irmin-pack,irmin/test/irmin-pack
- name: Remove varying components from data
run: |
# TODO: This could be done on the `merl-an` side
jq '.responses |= map(del(.heap_mbytes, .timings, .cache))' \
${{ env.data_dir }}/$FULL_DATA_FILE > temp.json && \
mv temp.json ${{ env.data_dir }}/$FULL_DATA_FILE
- name: Create name for data artifact
id: artifact_name
env:
base_branch_artifact_name: ${{ env.BASE_BRANCH_ARTIFACT_NAME }}
merge_branch_artifact_name: ${{ env. MERGE_BRANCH_ARTIFACT_NAME }}
run: echo "name=$${{ matrix.commit }}_artifact_name" >> $GITHUB_OUTPUT
- name: Upload data
uses: actions/upload-artifact@v3
with:
name: ${{ steps.artifact_name.outputs.name }}
path: ${{ env.data_dir }}
- name: Compile diff tool
if: ${{ matrix.commit == 'merge_branch' }}
run: |
# Taking advantage that ocamlopt is installed on this runner: compile the diff tool here and share it with the next job where it's needed.
# All GH runners are hosted on x86 machines and all jobs in this workflow declare the same OS, so this should workTM.
opam exec -- ocamlopt -o create_diff .github/fuzzy-ci-helpers/create_diff.ml
- name: Upload diff tool
if: ${{ matrix.commit == 'merge_branch' }}
uses: actions/upload-artifact@v3
with:
name: diff_tool
path: create_diff
diff:
name: Generate diffs
runs-on: ubuntu-22.04
outputs:
diff_exits: ${{steps.full_responses_diff.outputs.diff_exists}}
needs: data
env:
base_data_dir: base_data
merge_data_dir: merge_data
diff_dir: diff
steps:
- name: Download base branch data
uses: actions/download-artifact@v3
with:
name: ${{ env.BASE_BRANCH_ARTIFACT_NAME }}
path: ${{ env.base_data_dir }}
- name: Download merge branch data
uses: actions/download-artifact@v3
with:
name: ${{ env.MERGE_BRANCH_ARTIFACT_NAME }}
path: ${{ env.merge_data_dir }}
- name: Create diff dir
run: mkdir -p "$diff_dir"
- name: Download diff tool
uses: actions/download-artifact@v3
with:
name: diff_tool
- name: Give diff tool execute permissions
run: chmod +x create_diff
- name: Generate full responses diff
id: full_responses_diff
run: |
jq -r -n \
--slurpfile data1 "$base_data_dir/$FULL_DATA_FILE" \
--slurpfile data2 "$merge_data_dir/$FULL_DATA_FILE" \
'def process_json($branch; $data):
($branch + ": " + $data.cmd + " (id=" + ($data.sample_id | tostring) + ")"), $data;
range($data1|length) as $i |
process_json("base branch"; $data1[$i]),
"--input-separator--",
process_json("merge branch"; $data2[$i]),
"--diff-cmd-separator--"' \
| ./create_diff "--input-separator--" "--diff-cmd-separator--" "$diff_dir/$FULL_DIFF_FILE"
if [ -s "$diff_dir/$FULL_DIFF_FILE" ]; then
echo "diff_exists=true" | tee -a $GITHUB_OUTPUT
else
echo "diff_exists=false" | tee -a $GITHUB_OUTPUT
fi
- name: Generate distilled data diff
# If there's no full reponses diff, there also won't be a distilled data diff
if: ${{ steps.full_responses_diff.outputs.diff_exists == 'true' }}
run: |
jq -r -n \
--slurpfile data1 "$base_data_dir/$DISTILLED_DATA_FILE" \
--slurpfile data2 "$merge_data_dir/$DISTILLED_DATA_FILE" \
'def process_json($branch; $data):
($branch + ": " + $data.cmd + " (id=" + ($data.sample_id | tostring) + ")"), $data;
range($data1|length) as $i |
process_json("base branch"; $data1[$i]),
"--input-separator--",
process_json("merge branch"; $data2[$i]),
"--diff-cmd-separator--"' \
| ./create_diff "--input-separator--" "--diff-cmd-separator--" "$diff_dir/$DISTILLED_DIFF_FILE"
- name: Upload diff(s)
uses: actions/upload-artifact@v3
with:
name: ${{ env.DIFF_ARTIFACT_NAME }}
path: ${{ env.diff_dir }}
output:
name: Evaluate diffs
runs-on: ubuntu-22.04
needs: diff
env:
earlier_diff_was_approved: ${{ contains(github.event.pull_request.labels.*.name, 'fuzzy-diff-looks-good') }}
current_diff_exists: ${{ needs.diff.outputs.diff_exits }}
diff_dir: ${{ needs.artifact_names.outputs.diff_dir }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download current diff(s)
if: ${{ env.current_diff_exists == 'true' }}
uses: actions/download-artifact@v3
with:
name: ${{ env.DIFF_ARTIFACT_NAME }}
- name: Retreive hash of approved diff
if: ${{ env.earlier_diff_was_approved == 'true' }}
id: approved_diff
run: |
msg_start=$(head -c 50 .github/fuzzy-ci-helpers/msg.txt)
next_page_endpoint="$GH_API_COMMENTS?per_page=100&page=1"
latest_comment="{}"
while [ -n "$next_page_endpoint" ]; do
latest_comment=$(
curl -s -D "headers.txt" -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" "$next_page_endpoint" |
jq --arg msg_start "$msg_start" --argjson latest "{}" '
map(
select(
(.body | startswith($msg_start)) and .user.login == "github-actions[bot]"
)
) + [$latest] | max_by(.created_at)'
)
next_page_endpoint=$(
rg '^link:' headers.txt |
tr ',' '\n' |
rg 'rel="next"' |
cut -d'<' -f2 |
cut -d'>' -f1
)
done
hash=$(echo "$latest_comment" | jq '.body' -r | grep '256-sha' | awk '{print $NF}')
echo "hash='$hash'" | tee -a $GITHUB_OUTPUT
- name: Analyze current diff
id: current_diff
run: |
hash=$(sha256sum "$FULL_DIFF_FILE" | awk '{print $1}')
echo "hash='$hash'" | tee -a $GITHUB_OUTPUT
- name: Write instruction to delete PR label
# When this workflow is triggered by a PR from a fork, it doesn't have
# the permissions to delete PR labels. Instead, we forward the
# instruction to delete the label to fuzzy-ci-privileged.yml.
if: ${{ env.earlier_diff_was_approved == 'true' && steps.approved_diff.outputs.hash != steps.current_diff.outputs.hash }}
run: |
echo ${{ steps.approved_diff.outputs.hash }}
echo ${{ steps.current_diff.outputs.hash }}
mkdir -p ./forward
jq -n \
--arg instruction "delete_label" \
--arg endpoint "$GH_API_LABELS" \
'{instruction: $instruction, endpoint: $endpoint}' > ./forward/instruction.json
- name: Upload instruction to delete label
if: ${{ env.earlier_diff_was_approved == 'true' && steps.approved_diff.outputs.hash != steps.current_diff.outputs.hash }}
uses: actions/upload-artifact@v3
with:
name: forwarded_instructions
path: forward/
- name: Return
id: return
env:
github_api_labels_url: ${{ github.event.pull_request.base.repo.url }}/issues/${{ github.event.pull_request.number }}/labels
run: |
print_head_of_diffs () {
echo "--------beginning of full responses diff head--------"
head -n 100 "$FULL_DIFF_FILE"
echo "--------end of full responses diff head--------"
echo "--------beginning of distilled data diff head--------"
head -n 100 "$DISTILLED_DIFF_FILE"
echo "--------end of distilled data diff head--------"
}
# FIXME (?): Are nested conditionals always so ugly in Bash, or is there a better way? Option types and the possibility to match would help a lot.
LABEL_NAME=$(cat .github/fuzzy-ci-helpers/label_name.txt)
if $earlier_diff_was_approved; then
echo "Earlier diff was approved."
if [ ${{ steps.current_diff.outputs.hash }} == ${{ steps.approved_diff.outputs.hash }} ]; then
echo "This diff has been approved earlier. Everything ok."
exit 0
else
print_head_of_diffs
printf "The diff has changed since it was approved. So I'm removing the $LABEL_NAME label. If the new diff looks good, please set the label again.\n\
There's a head of the new diffs printed above. The whole diffs can be downloaded from $CURRENT_ACTION_URL .\n\
Previous sha256: ${{ steps.approved_diff.outputs.hash }}\n\
Current sha256: ${{ steps.current_diff.outputs.hash }}"
echo "delete_label=true" >> $GITHUB_OUTPUT
exit 1
fi
else
if $current_diff_exists; then
print_head_of_diffs
printf "There's a head of the diffs printed above. The diffs can be downloaded from $CURRENT_ACTION_URL .\nIf it looks good, please set the $LABEL_NAME label on the PR."
exit 1
else
echo "No diff. All good."
exit 0
fi
fi
approve:
name: Approve diff
if: >
github.event_name == 'pull_request' &&
github.event.action == 'labeled' &&
github.event.label.name == 'fuzzy-diff-looks-good'
runs-on: ubuntu-22.04
steps:
- name: Retreive diff artifact meta-data
id: diff_metadata
run: |
all_artifacts=$(curl -sSL "$GH_API_ARTIFACTS")
diff_artifact=$(echo "$all_artifacts" | jq "first(.artifacts[] | select(.name == \"$DIFF_ARTIFACT_NAME\") )")
id=$(echo "$diff_artifact" | jq ".id")
echo "id=$id" | tee -a $GITHUB_OUTPUT
workflow_run=$(echo "$diff_artifact" | jq ".workflow_run | .id")
echo "workflow_run=$workflow_run" | tee -a $GITHUB_OUTPUT
if [ -z $id ]; then
echo "exists=false" | tee -a $GITHUB_OUTPUT
else
echo "exists=true" | tee -a $GITHUB_OUTPUT
fi
- name: Write instruction to delete PR label
# When this workflow is triggered by a PR from a fork, it doesn't have
# the permissions to delete PR labels. Instead, we forward the
# instruction to delete the label to fuzzy-ci-privileged.yml.
if: ${{ steps.diff_metadata.outputs.exists == 'false' }}
run: |
mkdir -p ./forward
jq -n \
--arg instruction "delete_label" \
--arg endpoint "$GH_API_LABELS" \
'{instruction: $instruction, endpoint: $endpoint}' > ./forward/instruction.json
- name: Upload instruction to delete label
if: ${{ steps.diff_metadata.outputs.exists == 'false' }}
uses: actions/upload-artifact@v3
with:
name: forwarded_instructions
path: forward/
- name: Fail due to diff not existing yet
if: ${{ steps.diff_metadata.outputs.exists == 'false' }}
run: |
printf "You seem to have tried to approve a diff that doesn't exist yet.\nWait for the diff to have been generated and then try again."
exit 1
- name: Download diff
env:
id: ${{ steps.diff_metadata.outputs.id }}
run: |
# Doing this manually, since actions/download-artifact only works on the same workflow run on which the artifact was uploaded
curl -sSLO -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" "$GH_API_ARTIFACTS/$id/zip" -D headers.txt
- name: Unzip downloaded diff
run: |
unzip zip || (echo "Download of diff artifact failed" && cat headers.txt && cat zip && exit 1)
- name: Compute full responses diff hash
id: diff_hash
run: |
hash=$(sha256sum "$FULL_DIFF_FILE" | awk '{print $1}')
echo "hash=$hash" | tee -a $GITHUB_OUTPUT
- name: Write instruction to comment on PR
# When this workflow is triggered by a PR from a fork, it doesn't have
# the permissions to comment on PRs. Instead, we forward the
# instruction to comment on the PR to fuzzy-ci-privileged.yml.
env:
approved_diffs_workflow_run: ${{ steps.diff_metadata.outputs.workflow_run }}
approved_diffs_hash: ${{ steps.diff_hash.outputs.hash }}
run: |
mkdir -p ./forward
jq -n \
--arg instruction "comment" \
--arg endpoint "$GH_API_COMMENTS" \
--arg artifacts_url "$ACTIONS_RUNS_ENDPOINT/$approved_diffs_workflow_run" \
--arg hash "$approved_diffs_hash" \
'{instruction: $instruction, endpoint: $endpoint, artifacts_url: $artifacts_url, hash: $hash}' > ./forward/instruction.json
- name: Upload instruction to comment on PR
uses: actions/upload-artifact@v3
with:
name: forwarded_instructions
path: forward/