php写的关于静态页面的蜘蛛爬行记录的代码
时间:2021-07-01 10:21:17
帮助过:8人阅读
$useragent = addslashes(strtolower($_SERVER['HTTP_USER_AGENT'])); - if (strpos($useragent, 'googlebot')!== false){$bot = 'Google';}
- elseif (strpos($useragent,'mediapartners-google') !== false){$bot = 'Google Adsense';}
- elseif (strpos($useragent,'baiduspider') !== false){$bot = 'Baidu';}
- elseif (strpos($useragent,'sogou spider') !== false){$bot = 'Sogou';}
- elseif (strpos($useragent,'sogou web') !== false){$bot = 'Sogou web';}
- elseif (strpos($useragent,'sosospider') !== false){$bot = 'SOSO';}
- elseif (strpos($useragent,'yahoo') !== false){$bot = 'Yahoo';}
- elseif (strpos($useragent,'msn') !== false){$bot = 'MSN';}
- elseif (strpos($useragent,'msnbot') !== false){$bot = 'msnbot';}
- elseif (strpos($useragent,'sohu') !== false){$bot = 'Sohu';}
- elseif (strpos($useragent,'yodaoBot') !== false){$bot = 'Yodao';}
- elseif (strpos($useragent,'twiceler') !== false){$bot = 'Twiceler';}
- elseif (strpos($useragent,'ia_archiver') !== false){$bot = 'Alexa_';}
- elseif (strpos($useragent,'iaarchiver') !== false){$bot = 'Alexa';}
- elseif (strpos($useragent,'slurp') !== false){$bot = '雅虎';}
- elseif (strpos($useragent,'bot') !== false){$bot = '其它蜘蛛';}
- if(isset($bot)){
- $fp = @fopen('bot.txt','a');
- fwrite($fp,date('Y-m-d H:i:s')."\t".$_SERVER["REMOTE_ADDR"]."\t".$bot."\t".'http://'.$_SERVER['SERVER_NAME'].$_SERVER["HTTP_X_REWRITE_URL"]."\r\n");
- fclose($fp);
- }
- $file=".".$_SERVER[HTTP_X_REWRITE_URL];
- $f_head=substr($file,-5);
- if($f_head==".html")
- {
- if(file_exists($file))
- {
- echo file_get_contents($file);
- }else
- {
-
- header('HTTP/1.1 404 Not Found');
- header("status: 404 Not Found");
echo "该页面无法找到"; - }
- }
- else
- {
- header('HTTP/1.1 404 Not Found');
- header("status: 404 Not Found");
- echo "该页面无法找到";
- }
- ?>
伪静态文件内容:
[ISAPI_Rewrite] # 3600 = 1 hour - CacheClockRate 3600
-
- RepeatLimit 32
-
- # Protect httpd.ini and httpd.parse.errors files
- # from accessing through HTTP
- RewriteRule /index.html /index.php
- RewriteRule ^/article/(.*) /bot.php [L]
- RewriteRule ^/list/(.*) /bot.php [L]
|