php测试链接是否为404页面
帝国后台生成网站的sitemap,不知道其中是否有死链接,于是写了php脚本测试下链接的情况
<?php
//从txt中获取图片地址和采集页网址
function getUrls($fileName){
$lineArray=array();
$fp = fopen($fileName, "r") or die("Unable to open file ".$fileName);
while(! feof($fp)){
$line=fgets($fp);
$lineArr[]=trim($line);
}
fclose($fp);
return $lineArr;
}
$urls=getUrls('urls.txt');
foreach ($urls as $url) {
$headers = get_headers($url);
//print_r($headers);
if (strpos($headers[0], '404')){
echo $url."-------->404".PHP_EOL;
}else{
echo $url.'-------->正常访问'.PHP_EOL;
}
}
?>
测试结果如下
测试页面是否能打开,并将统计结果输出
<?php
set_time_limit(0);
//从txt中获取图片地址和采集页网址
function getUrls($fileName){
$lineArray=array();
$fp = fopen($fileName, "r") or die("Unable to open file ".$fileName);
while(! feof($fp)){
$line=fgets($fp);
$lineArr[]=trim($line);
}
fclose($fp);
return $lineArr;
}
function getStatus($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_exec($ch); // $resp = curl_exec($ch);
$curl_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return $curl_code;
}
$urls=getUrls('urls.txt');
$fp=fopen("200urls.txt", "a+");
foreach ($urls as $url) {
//$headers = get_headers($url);
$curl_code=getStatus($url);
if ($curl_code == 200 || $curl_code == 302|| $curl_code == 301) {
echo $url."-------->{$curl_code}".PHP_EOL."</br>";
fwrite($fp, $url.PHP_EOL);
} else {
echo $url.'-------->不能正常访问'.PHP_EOL."</br>";
}
}
fclose($fp);
?>
测试结果如下
http://www.experienceproject.com——–>200
http://www.bcz.com——–>301
http://www.wordpress.com/——–>301
http://www.blogger.com——–>不能正常访问
https://www.facebook.com/——–>302
http://www.linkedin.com——–>301
http://www.tumblr.com/dashboard——–>不能正常访问
https://twitter.com/——–>200
https://myspace.com/home——–>302
http://www.arto.com——–>200
http://www.pysznosci.org——–>不能正常访问
http://www.skyrock.com——–>不能正常访问
http://www.livejournal.com——–>200
http://www.createblog.com——–>200
http://over-blog.com——–>不能正常访问
http://www.folkd.com——–>200
http://iblog.co.za——–>不能正常访问
http://friendsite.com——–>301
http://www.onsugar.com/——–>200