当前位置:Gxlcms > PHP教程 > php爬虫

php爬虫

时间:2021-07-01 10:21:17 帮助过:14人阅读

  1. <?php
  2. header("Content-Type:text/html; charset=gb2312");
  3. $url1 = "http://i8i8.cc/";
  4. $ch1 = curl_init();
  5. curl_setopt($ch1, CURLOPT_URL, $url1 );
  6. curl_setopt($ch1, CURLOPT_RETURNTRANSFER, true);
  7. $output1 = curl_exec($ch1);
  8. curl_close($ch1);
  9. $regx1='/(<a href=")(\/cclass\/((9|10|11|12|13|14|15)_1\.html))(">)(.*)(<\/a>)/';
  10. preg_match_all ($regx1,$output1,$result, PREG_SET_ORDER);
  11. //print_r($result);
  12. $conn=mysql_connect(localhost,test,111111);
  13. mysql_select_db("test",$conn);
  14. mysql_query('SET NAMES gb2312;');
  15. foreach($result as $key =>$value)
  16. {
  17. $url2 = "http://i8i8.cc".$result[$key][2];
  18. //print_r($url2);
  19. $ch2 = curl_init();
  20. curl_setopt($ch2, CURLOPT_URL, $url2 );
  21. curl_setopt($ch2, CURLOPT_RETURNTRANSFER, true);
  22. $output2 = curl_exec($ch2);
  23. //print_r($output2);
  24. curl_close($ch2);
  25. $regx2='/(<li>)(<a href=")(.*\.html)(.*)(<img src=")(.*)(" alt=)(.*)(<span>)(.*)(<\/span>)/';
  26. preg_match_all ($regx2,$output2,$movie, PREG_SET_ORDER);
  27. //print_r($movie);
  28. foreach($movie as$k =>$v )
  29. {
  30. $movie[$k][3]=substr($movie[$k][3],2);
  31. $movie[$k][3]="http://i8i8.cc".$movie[$k][3];
  32. //print_r($movie[$k][3]);
  33. $url3 = $movie[$k][3];
  34. $ch3 = curl_init();
  35. curl_setopt($ch3, CURLOPT_URL, $url3 );
  36. curl_setopt($ch3, CURLOPT_RETURNTRANSFER, true);
  37. $output3 = curl_exec($ch3);
  38. //var_dump($output3);
  39. curl_close($ch3);
  40. $regx_name='/(<div class=")(row_right)("><strong>)(.*)<\/strong>/';
  41. preg_match_all ($regx_name,$output3,$movie_name, PREG_SET_ORDER);
  42. $regx_actor='/(<div class="img">)(.*)(artlist&keyword=)(.*)(>)(.*)(<\/a>)/';
  43. preg_match_all ($regx_actor,$output3,$movie_actor, PREG_SET_ORDER);
  44. $regx_catalog='/(<div class="row_right"><A href="\/cclass\/(.*).html">)(.*)(<\/A> )/';
  45. preg_match_all ($regx_catalog,$output3,$movie_catalog, PREG_SET_ORDER);
  46. $regx_year='/<a href="\/year\/(.*).html">(.*)<\/a>/';
  47. preg_match_all ($regx_year,$output3,$movie_year, PREG_SET_ORDER);
  48. //print_r($movie_year);//[2]
  49. $regx_update='/(更新时间:<\/div><div class="row_right">)(.*)<\/div>/';
  50. preg_match_all ($regx_update,$output3,$movie_update, PREG_SET_ORDER);
  51. //print_r($movie_update);//[2]
  52. $regx_desc='/(主演。)(.*)<\/div>/';
  53. preg_match_all ($regx_desc,$output3,$movie_desc, PREG_SET_ORDER);
  54. //var_dump($movie_desc);//[2]
  55. }
  56. //mysql_query("insert into t_catalog values('{$key}','{$result[$key][6]}');");
  57. }
  58. mysql_close($conn);
  59. ?>

人气教程排行