.htaccess有条件的禁止客户端HTTP/1.0 1.1访问
以下内容的大概意思就是:当来访者的IP不是2408或240E开头,又不是一些你允许的爬虫,就只允许访问robots.txt文件,其他一切不允许访问,返回403错误码.
RewriteEngine on
#禁止HTTP/1.0 /1.1开始
#判断来访IP,如果不是2408或240e
RewriteCond %{REMOTE_ADDR} !^2408:
RewriteCond %{REMOTE_ADDR} !^240e:
#判断来访是否是GOOGLE之类你允许的爬虫访问
RewriteCond %{HTTP_USER_AGENT} !Google [NC]
RewriteCond %{HTTP_USER_AGENT} !Baidu [NC]
RewriteCond %{HTTP_USER_AGENT} !Yandex [NC]
RewriteCond %{HTTP_USER_AGENT} !sougou [NC]
RewriteCond %{HTTP_USER_AGENT} !Bing [NC]
RewriteCond %{HTTP_USER_AGENT} !Bytespider [NC]
RewriteCond %{HTTP_USER_AGENT} !Applebot [NC]
#判断客户端是不是以HTTP1.0或1.1访问
RewriteCond %{THE_REQUEST} HTTP/1\.0$ [OR]
RewriteCond %{THE_REQUEST} HTTP/1\.1$
#只允许访问/robots.txt
RewriteCond %{REQUEST_URI} !^/robots.txt
#禁止所有,返回403
RewriteRule ^/?(.*) - [F,L]
#禁止HTTP/1.0 /1.1结束