时间:2021-07-01 10:21:17 帮助过:4人阅读
/*** 匹配任意id的html标签内容* */function getWebTag($tag_id,$tag='div',$data=false){ $charset_pos = stripos($data,'charset'); if($charset_pos) { if(stripos($data,'utf-8',$charset_pos)) { $data = iconv('utf-8','utf-8',$data); }else if(stripos($data,'gb2312',$charset_pos)) { $data = iconv('gb2312','utf-8',$data); }else if(stripos($data,'gbk',$charset_pos)) { $data = iconv('gbk','utf-8',$data); } } preg_match_all('/<'.$tag.'/i',$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有div前缀 preg_match_all('/<\/'.$tag.'/i',$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有div后缀 $hit = strpos($data,$tag_id); if($hit == -1) return false; //未命中 $divs = array(); //合并所有div foreach($pre_matches[0] as $index=>$pre_div){ $divs[(int)$pre_div[1]] = 'p'; $divs[(int)$suf_matches[0][$index][1]] = 's'; } //对div进行排序 $sort = array_keys($divs); asort($sort); $count = count($pre_matches[0]); foreach($pre_matches[0] as $index=>$pre_div){ //'; break; } } return $hitDivString;}
调用示例
$html=file_get_contents('http://www.baidu.com');
$divContent=getWebTag('id="content"','div',$html);