-
Notifications
You must be signed in to change notification settings - Fork 304
241 lines (230 loc) · 9.34 KB
/
test-hyperlinks.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
name: Test Hyperlinks
on:
pull_request:
branches:
- "master"
- "candidate-*"
- "!candidate-9.4.*"
- "!candidate-9.2.*"
- "!candidate-9.0.*"
- "!candidate-8.*"
- "!candidate-7.*"
- "!candidate-6.*"
workflow_dispatch:
inputs:
Debug-Mode:
type: boolean
description: Run in Debug mode to upload all created files
default: false
required: false
jobs:
main:
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: List Documentation files
run: |
if [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then
find $PWD -name '*.xml' -type f > xmlFilesList.txt
find $PWD -name '*.md' -type f > mdFilesList.txt
find $PWD -name '*.rst' -type f > rstFilesList.txt
else
git diff --name-only HEAD^1 HEAD > updatedFiles.txt
cat updatedFiles.txt | grep -E "*.xml" | tee xmlFilesList.txt 1>&/dev/null
cat updatedFiles.txt | grep -E "*.md" | tee mdFilesList.txt 1>&/dev/null
cat updatedFiles.txt | grep -E "*.rst" | tee rstFilesList.txt 1>&/dev/null
fi
continue-on-error: true
- name: List links from Documentation files
run: |
IFS=$'\n'
for FILE in $( cat xmlFilesList.txt )
do
#check if the file is missing
if [[ ! -f $FILE ]]; then
echo -e "$FILE -\e[31m file missing\e[0m"
echo $FILE >> missingFiles.txt
continue
fi
grep -onHE -e "<ulink" -e 'url="http[^\"\]+' -e "</ulink>" ${FILE} | sed 's/url="//' > links.tmp
FLAG=0
for LINE in $( cat links.tmp )
do
LINK=$( echo $LINE | cut -d ':' -f3- )
if [[ ${LINK:0:6} == '<ulink' ]]; then
FLAG=1
continue
elif [[ ${LINK:0:8} == '</ulink>' ]]; then
FLAG=0
continue
fi
if [[ $FLAG -eq 1 ]]; then
echo $LINE >> linksList.txt
fi
done
done
for FILE in $( cat mdFilesList.txt )
do
#check if the file is missing
if [[ ! -f $FILE ]]; then
echo -e "$FILE -\e[31m file missing\e[0m"
echo $FILE >> missingFiles.txt
continue
fi
grep -onHE -e "\]\([^\)]+" -e "\`\`\`[^\`]*" -e "http://[^\ \;\"\'\<\>\]\[\,\`\)]+" -e "https://[^\ \;\"\'\<\>\]\[\,\`\)]+" ${FILE} | sed 's/](//' > links.tmp
FLAG=0
for LINE in $( cat links.tmp )
do
LINK=$( echo $LINE | cut -d ':' -f3- )
if [[ ${LINK:0:3} == '```' ]]; then
FLAG=$(( 1 - FLAG ))
continue
fi
if [[ $FLAG -eq 0 ]]; then
echo $LINE >> linksList.txt
fi
done
done
for FILE in $( cat rstFilesList.txt )
do
#check if the file is missing
if [[ ! -f $FILE ]]; then
echo -e "$FILE -\e[31m file missing\e[0m"
echo $FILE >> missingFiles.txt
continue
fi
grep -onHE -e ".. _[^\]+" -e "http://[^\ \;\"\'\<\>\,\`\)]+" -e "https://[^\ \;\"\'\<\>\,\`\)]+" ${FILE} | sed 's/.. _[^\:]*: //' >> linksList.txt
done
if [[ -f linksList.txt ]]; then
cat linksList.txt | grep -vE '127.0.0.1|localhost|\$|\[' | grep -E 'https://|http://' | tee externalLinks.txt 1>&/dev/null
cat linksList.txt | grep -vE '127.0.0.1|localhost|\$|\[' | grep -vE 'https://|http://' | tee internalLinks.txt 1>&/dev/null
fi
- name: Test External links
run: |
touch checkedLinksCache.txt
IFS=$'\n'
if [[ -f externalLinks.txt ]]; then
for LINE in $(cat externalLinks.txt )
do
LINK=$( echo $LINE | cut -d ':' -f3- )
LINK=${LINK%.} #remove trailing dot(.)
LINK=${LINK% } #remove trailing space
CHECK_CACHE=$( cat checkedLinksCache.txt | grep "$LINK~" | wc -w )
TRY=3 #Max attempts to check status code of hyperlinks
if [[ $CHECK_CACHE -eq 0 ]]; then
while [[ $TRY -ne 0 ]]
do
STATUS_CODE=$(curl -LI -m 60 -s $LINK | grep "HTTP/" | tail -1 | cut -d' ' -f2 )
if [[ -n $STATUS_CODE ]]; then
echo "$LINK~$STATUS_CODE" >> checkedLinksCache.txt
break
else
echo $LINE
echo "retrying..."
TRY=$(( TRY - 1))
fi
done
else
STATUS_CODE=$( cat checkedLinksCache.txt | grep "$LINK~" | cut -d '~' -f2 )
fi
if [[ $STATUS_CODE -eq 404 ]]; then
echo -e "${LINK} - \e[31m404 Error\e[0m"
echo "${LINE}" >> error-report.log
elif [[ ! -n $STATUS_CODE ]]; then
echo -e "${LINK} - \e[31mNo Response\e[0m"
echo "${LINE}(No-Response)" >> error-report.log
else
echo "${LINK} - ${STATUS_CODE}"
fi
done
fi
- name: Test Internal Links
run: |
if [[ -f internalLinks.txt ]]; then
for LINE in $( cat internalLinks.txt )
do
REFERENCE=$( echo $LINE | cut -d ':' -f3- )
FILE=$( echo $LINE | cut -d ':' -f1 )
if [[ ${REFERENCE:0:1} == '#' ]]; then
LINK_TEXT=$( cat $FILE | grep -oE "\[.*\]\(${REFERENCE}\)" | sed 's/\[//' | cut -d ']' -f1 )
IS_PRESENT=$(cat $FILE | grep -oE "# ${LINK_TEXT}" | wc -w)
if [[ $IS_PRESENT -eq 0 ]]; then
echo -e "${LINE} -\e[31m invalid reference\e[0m"
echo "${LINE}" >> error-report.log
fi
else
if [[ ${REFERENCE:0:1} == '/' ]]; then
BASE_DIR=$PWD
else
BASE_DIR=${FILE/$( basename $FILE )}
fi
SEARCH_FILE="$BASE_DIR/${REFERENCE}"
SEARCH_FILE=$( realpath $SEARCH_FILE )
if [[ ! -f $SEARCH_FILE ]]; then
echo -e "${LINE} -\e[31m invalid reference\e[0m"
echo ${LINE/$REFERENCE/$SEARCH_FILE} >> error-report.log
fi
fi
done
fi
- name: Report Error links
run: |
if [[ -f error-report.log ]]; then
NUMBER_OF_404_LINKS=$( cat error-report.log | wc -l )
fi
echo -e "\e[32mNo. of files scanned : $( cat *FilesList.txt | wc -l )\e[0m"
if [[ $NUMBER_OF_404_LINKS -ne 0 ]]; then
echo -e "\e[31mNo. of unique broken links : $( cat error-report.log | cut -d: -f3- | sort | uniq | wc -l )\e[0m"
echo -e "\e[31mTotal No. of reference to broken links : $( cat error-report.log | cut -d: -f3- | sort | wc -l )\e[0m"
exit -1
else
echo -e "\e[32mNo Broken-links found\e[0m"
fi
- name: Modify log file
if: ${{ failure() || cancelled() }}
run: |
BASE_DIR=${PWD%$(basename $PWD)}
BASE_DIR=$(echo $BASE_DIR | sed 's/\//\\\//g')
sed -i "s/${BASE_DIR}//g" error-report.log
FILE_NAMES_LIST=$(cat error-report.log | cut -d ':' -f1 | sort | uniq )
FILE_COUNT=1
for LINE in $FILE_NAMES_LIST
do
LINKS_LIST=$( cat error-report.log | grep $LINE | cut -d ':' -f2- )
echo "$FILE_COUNT. $LINE" >> error-reportTmp.log
FILE_COUNT=$(( FILE_COUNT + 1))
for LINK in $LINKS_LIST
do
echo -e "\t Line $LINK" | sed 's/:/ : /' >> error-reportTmp.log
done
done
if [[ $(cat missingFiles.txt | wc -w ) -eq 0 ]]; then
echo -e "Broken links: \n" > error-report.log
cat error-reportTmp.log >> error-report.log
else
echo -e "Missing Files: \n" > error-report.log
cat missingFiles.txt >> error-report.log
echo -e "Broken links: \n" >> error-report.log
cat error-reportTmp.log >> error-report.log
fi
if [[ ${{ github.event_name }} == "pull_request" || ${{ inputs.Debug-Mode }} == false ]]; then
rm -rf *FilesList.txt \
checkedLinksCache.txt \
*Links.txt \
linksList.txt \
fi
- name: Upload logs
uses: actions/upload-artifact@v4
if: ${{ failure() || cancelled() }}
with:
name: Hyperlinks-testing-log
path: |
/home/runner/work/HPCC-Platform/HPCC-Platform/error-report.log
/home/runner/work/HPCC-Platform/HPCC-Platform/*FilesList.txt
/home/runner/work/HPCC-Platform/HPCC-Platform/checkedLinksCache.txt
/home/runner/work/HPCC-Platform/HPCC-Platform/*Links.txt
/home/runner/work/HPCC-Platform/HPCC-Platform/linksList.txt
if-no-files-found: ignore