/
robots.txt
139 lines (125 loc) · 2.66 KB
/
robots.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Bloqueo basico para todos los bots y crawlers
# puede dar problemas por bloqueo de recursos en GWT
User-agent: *
Allow: /wp-content/uploads/*
Allow: /wp-content/*.js
Allow: /wp-content/*.css
Allow: /wp-includes/*.js
Allow: /wp-includes/*.css
Disallow: /cgi-bin
Disallow: /wp-content/plugins/
Disallow: /wp-content/themes/
Disallow: /wp-includes/
Disallow: /*/attachment/
Disallow: /tag/*/page/
Disallow: /tag/*/feed/
Disallow: /page/
Disallow: /comments/
Disallow: /xmlrpc.php
Disallow: /?attachment_id*
# Bloqueo de las URL dinamicas
Disallow: /*?
#Bloqueo de busquedas
User-agent: *
Disallow: /?s=
Disallow: /search
# Bloqueo de trackbacks
User-agent: *
Disallow: /trackback
Disallow: /*trackback
Disallow: /*trackback*
Disallow: /*/trackback
# Bloqueo de feeds para crawlers
User-agent: *
Allow: /feed/$
Disallow: /feed/
Disallow: /comments/feed/
Disallow: /*/feed/$
Disallow: /*/feed/rss/$
Disallow: /*/trackback/$
Disallow: /*/*/feed/$
Disallow: /*/*/feed/rss/$
Disallow: /*/*/trackback/$
Disallow: /*/*/*/feed/$
Disallow: /*/*/*/feed/rss/$
Disallow: /*/*/*/trackback/$
# Ralentizamos algunos bots que se suelen volver locos
User-agent: noxtrumbot
Crawl-delay: 20
User-agent: msnbot
Crawl-delay: 20
User-agent: Slurp
Crawl-delay: 20
# Bloqueo de bots y crawlers poco utiles
User-agent: MSIECrawler
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: Microsoft.URL.Control
Disallow: /
User-agent: libwww
Disallow: /
User-agent: Orthogaffe
Disallow: /
User-agent: UbiCrawler
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Zao
Disallow: /
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: MSIECrawler
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: WebZIP
Disallow: /
User-agent: linko
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: Microsoft.URL.Control
Disallow: /
User-agent: Xenu
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: Download Ninja
Disallow: /
User-agent: wget
Disallow: /
User-agent: grub-client
Disallow: /
User-agent: k2spider
Disallow: /
User-agent: NPBot
Disallow: /
User-agent: WebReaper
Disallow: /
# Previene problemas de recursos bloqueados en Google Webmaster Tools
User-Agent: Googlebot
Allow: /*.css$
Allow: /*.js$
Sitemap: http://www.cancarner.cat/sitemap_index.xml
Sitemap:http://www.cancarner.cat/page-sitemap.xml