时间:2021-07-01 10:21:17 帮助过:7人阅读
$j=0;
//起始ID
$id = 200000;
//采集1000条
//保存采集的数据
$data = array();
while($j<1000){
$url = 'http://www.xiami.com/song/'.($id++);
$ch = curl_init();
$status = curl_getinfo($ch);
///$status['redirect_url'] ;// 跳转到的新地址
$header[]='Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8';
$header[]='Accept-Encoding:gzip,deflate,sdch';
$header[]='Accept-Language:zh-CN,zh;q=0.8';
$header[]='Cache-Control:max-age=0';
$header[]='Connection:keep-alive';
$header[]='Cookie:_unsign_token=a35437bd35c221c09a0e6f564e17c225; __gads=ID=7fcc242f6fd63d77:T=1408774454:S=ALNI_Mae8MH6vL5z6q4NlGYzyqgD4jHeEg; bdshare_firstime=1408774454639; _xiamitoken=3541aab48832ba3ceb089de7f39b9b0f; pnm_cku822=211n%2BqZ9mgNqgJnCG0Zu8%2BzyLTPuc%2B7wbrff98%3D%7CnOiH84T3jPCG%2FIr%2BiPOG8lI%3D%7CneiHGXz6UeRW5k4rRCFXIkcoTdd7ym3fZdO2FrY%3D%7Cmu6b9JHlkuGa5pDqnOie5ZDkmeqb4ZTule6V7ZjjlOib7JrmkvdX%7Cm%2B%2BT%2FGIUew96DXsUYBd4HawbrTOXOVI4iyOLIYUqT%2B9P%7CmO6BH2wDcB9rHGsYdwRrH2gfbAN%2FDH8QZBNkF3gDeQqqCg%3D%3D%7Cme6d7oHyneiH84Twn%2BmR64TzUw%3D%3D; CNZZDATA921634=cnzz_eid%3D1437506062-1408774274-%26ntime%3D1408937320; CNZZDATA2629111=cnzz_eid%3D2021816723-1408774274-%26ntime%3D1408937320; isg=075E6FBDF77039CEB63A1BA239420244';
$header[]='Host:www.xiami.com';
$header[]='User-Agent:Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1653.0 Safari/537.36';
curl_setopt($ch, CURLOPT_URL, $url); //要访问的地址
curl_setopt($ch, CURLOPT_HTTPHEADER, $header); //设置http头
curl_setopt($ch, CURLOPT_HEADER, 0); //显示返回的Header区域内容
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //获取的信息以文件流的形式返回
curl_setopt($ch, CURLOPT_TIMEOUT, 20); //设置超时限制防止死循环
$content = curl_exec($ch); //执行操作
$curl_errno = curl_errno($ch);
$curl_error = curl_error($ch);
curl_close($ch); //关闭CURL会话
preg_match('/name="description"\s+content="《(.+)》演唱者(.+),所属专辑《(.+)》/', $content,$matches);
//如果歌曲名字为空,跳过
if(empty($matches[1]) || trim($matches[1]) == ''){
continue;
}
//匹配出的数据
$data[$id]['song'] = empty($matches[1])?' ':$matches[1];
$data[$id]['songer'] = empty($matches[2])?' ':$matches[2];
$data[$id]['album'] = empty($matches[3])?' ':$matches[3];
preg_match('/album\/(\d+)/', $content,$matches);
$data[$id]['albumId'] = empty($matches[1])?0:$matches[1];
preg_match('/\/artist\/(\d+)/', $content,$matches);
$data[$id]['songerId'] = empty($matches[1])?0:$matches[1];
//歌词
preg_match('/(.*)<\/div>/Us', $content,$matches);
$data[$id]['lrc'] = empty($matches[1])?' ':addslashes($matches[1]);
//分享 分享(3269)
preg_match('/分享\((\d+)\)<\/em>/Us', $content,$matches);
$data[$id]['share'] = empty($matches[1]) ? 0:$matches[1];
//评论次数920
preg_match('/(\d+)<\/span>/Us', $content,$matches);
$data[$id]['comment_count'] = empty($matches[1])?0:$matches[1];
//入库操作
//print_r($data);
//_____________________________
$j++;
usleep(3000);
}
if($_GET['num']){
$url = 'http://www.xiami.com/song/'.$_GET['num'];
//你的代码
$_GET['num'])++;
}
if($_GET['num']<1001){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,"http://localhost/caiji.php?num=".$_GET['num']));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,2);
curl_setopt($ch, CURLOPT_TIMEOUT ,2);
curl_exec($ch);
curl_close($ch);
}else{
exit;
}