-
Notifications
You must be signed in to change notification settings - Fork 3
/
robots.txt
116 lines (106 loc) · 2.63 KB
/
robots.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# robots生成工具:http://tool.chinaz.com/robots/
# 禁用某些浏览器爬虫
# Scrapy|Curl|HttpClient|PostmanRuntime
# 说明:python request/urllib,今日头条Bytespider,node,java HttpClient,C#爬虫WinHttp、HtmlAgilityPack,php:phpspider
# python-requests|Python-urllib|Bytespider|node-superagent|Java|HttpClient|WinHttp|HtmlAgilityPack|phpspider|^$
User-agent: Scrapy
Disallow: /
User-agent: curl
Disallow: /
User-agent: HttpClient
Disallow: /
User-agent: PostmanRuntime
Disallow: /
User-agent: python-requests
Disallow: /
User-agent: Python-urllib
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: node-superagent
Disallow: /
User-agent: Java
Disallow: /
User-agent: HttpClient
Disallow: /
User-agent: WinHttp
Disallow: /
User-agent: HtmlAgilityPack
Disallow: /
User-agent: phpspider
Disallow: /
# 网站(https://www.yansheng.fun/)已发现的不需要的爬虫
# Barkrowler|SeznamBot|Go-http-client|Nimbostratus-Bot|MauiBot|serpstatbot
# BLEXBot|DotBot|AhrefsBot|HTTP Banner Detection|Apache-HttpClient|Wappalyzer
User-agent: Barkrowler
Disallow: /
User-agent: SeznamBot
Disallow: /
User-agent: Go-http-client
Disallow: /
User-agent: Nimbostratus-Bot
Disallow: /
User-agent: MauiBot
Disallow: /
User-agent: serpstatbot
Disallow: /
User-agent: BLEXBot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: AhrefsBot
Disallow: /
User-agent: HTTP Banner Detection
Disallow: /
User-agent: Apache-HttpClient
Disallow: /
User-agent: Wappalyzer
Disallow: /
User-agent: Nmap Scripting Engine
Disallow: /
User-agent: centuryb
Disallow: /
User-agent: NetcraftSurveyAgent
Disallow: /
User-agent: Adsbot
Disallow: /
User-agent: ltx71
Disallow: /
User-agent: CensysInspect
Disallow: /
User-agent: Dataprovider.com
Disallow: /
# 允许所有用户代理的浏览器爬虫进行访问(爬取数据)
User-agent: *
# 检索间隔(秒)
Crawl-delay: 10
# 允许访问的内容
Allow: /
Allow: /archives/
Allow: /article/
Allow: /categories/
Allow: /page/
Allow: /photos/
Allow: /tags/
Allow: /links/
Allow: /message-wall/
Allow: /music/index.html
# 不允许访问的内容
Disallow: /assets/
Disallow: /fonts/
Disallow: /img/
Disallow: /js/
Disallow: /css/
Disallow: /music/lrc/
Disallow: /*.js
Disallow: /*.css
Disallow: /content.json
# 网站地图Sitemap
Sitemap: http://www.yansheng.fun/sitemap.xml
Sitemap: http://www.yansheng.fun/baidusitemap.xml
Sitemap: https://www.yansheng.fun/sitemap.xml
Sitemap: https://www.yansheng.fun/baidusitemap.xml
Sitemap: https://yansheng836.github.io/sitemap.xml
Sitemap: https://yansheng836.github.io/baidusitemap.xml
Sitemap: https://yansheng0083.gitee.io/sitemap.xml
Sitemap: https://yansheng0083.gitee.io/baidusitemap.xml