爬小说站
时间:2021-07-01 10:21:17
帮助过:12人阅读
给老婆大人看小说用的 原始站点总是弹窗,弹窗就弹窗吧,还有声音,声音还竟是哪种的
虽说可以用chrome屏蔽掉,也可写hosts,但是换个站点还的重新弄,怪麻烦的
- $base = 'http://xx/oo/';
- $start = 'xx.shtml';
- $content_grep = '/ (.*)
/'; - $next_grep = '/href=\'(\d+\.shtml)\'>下一页/';
- $next = $start;
- $file_name = 'out.txt';
- while($next) {
- echo 'getting ' . $next . PHP_EOL;
- $result = file_get_contents($base . $next);
- preg_match_all($content_grep, $result, $match);
- $isTitle = true;
- $content = "";
- foreach($match[1] as $line) {
- if($isTitle) {
- $content = $line . PHP_EOL . PHP_EOL;
- $isTitle = false;
- } else {
- $content .= ' ' . $line . PHP_EOL . PHP_EOL;
- }
- }
- $file = fopen($file_name, 'a');
- echo 'write length: ' . strlen($content) . PHP_EOL;
- fwrite($file, $content);
- fclose($file);
- echo '.';
- preg_match($next_grep, $result, $match);
- $next = $match[1];
- }
|