时间:2021-07-01 10:21:17 帮助过:37人阅读
- <br>/** <br>* 根据HTML代码获取word文档内容 <br>* 创建一个本质为mht的文档,该函数会分析文件内容并从远程下载页面中的图片资源 <br>* 该函数依赖于类MhtFileMaker <br>* 该函数会分析img标签,提取src的属性值。但是,src的属性值必须被引号包围,否则不能提取 <br>* <br>* @param string $content HTML内容 <br>* @param string $absolutePath 网页的绝对路径。如果HTML内容里的图片路径为相对路径,那么就需要填写这个参数,来让该函数自动填补成绝对路径。这个参数最后需要以/结束 <br>* @param bool $isEraseLink 是否去掉HTML内容中的链接 <br>*/ <br>function getWordDocument( $content , $absolutePath = "" , $isEraseLink = true ) <br>{ <br>$mht = new MhtFileMaker(); <br>if ($isEraseLink) <br>$content = preg_replace('/<a\s*.*?\s*>(\s*.*?\s*)<\/a>/i' , '$1' , $content); //去掉链接 <br>$images = array(); <br>$files = array(); <br>$matches = array(); <br>//这个算法要求src后的属性值必须使用引号括起来 <br>if ( preg_match_all('/<img[.\n]*?src\s*?=\s*?[\"\'](.*?)[\"\'](.*?)\/>/i',$content ,$matches ) ) <br>{ <br>$arrPath = $matches[1]; <br>for ( $i=0;$i<count($arrPath);$i++) <br>{ <br>$path = $arrPath[$i]; <br>$imgPath = trim( $path ); <br>if ( $imgPath != "" ) <br>{ <br>$files[] = $imgPath; <br>if( substr($imgPath,0,7) == 'http://') <br>{ <br>//绝对链接,不加前缀 <br>} <br>else <br>{ <br>$imgPath = $absolutePath.$imgPath; <br>} <br>$images[] = $imgPath; <br>} <br>} <br>} <br>$mht->AddContents("tmp.html",$mht->GetMimeType("tmp.html"),$content); <br>for ( $i=0;$i<count($images);$i++) <br>{ <br>$image = $images[$i]; <br>if ( @fopen($image , 'r') ) <br>{ <br>$imgcontent = @file_get_contents( $image ); <br>if ( $content ) <br>$mht->AddContents($files[$i],$mht->GetMimeType($image),$imgcontent); <br>} <br>else <br>{ <br>echo "file:".$image." not exist!<br />"; <br>} <br>} <br>return $mht->GetFile(); <br>} <br> <br>使用方法: <br><span><u></u></span> 代码如下:<pre class="brush:php;toolbar:false layui-box layui-code-view layui-code-notepad"><ol class="layui-code-ol"><li><br>$fileContent = getWordDocument($content,"http://www.yoursite.com/Music/etc/"); <br>$fp = fopen("test.doc", 'w'); <br>fwrite($fp, $fileContent); <br>fclose($fp); <br> <br>其中,$content变量应该是HTML源代码,后面的链接应该是能填补HTML代码中图片相对路径的URL地址 <br>注意,在使用这个函数之前,您需要先包含类MhtFileMaker,这个类可以帮助我们生成Mht文档。 <br><span><u></u></span> 代码如下:<pre class="brush:php;toolbar:false layui-box layui-code-view layui-code-notepad"><ol class="layui-code-ol"><li><br><?php <br>/*********************************************************************** <br>Class: Mht File Maker <br>Version: 1.2 beta <br>Date: 02/11/2007 <br>Author: Wudi <wudicgi@yahoo.de> <br>Description: The class can make .mht file. <br>***********************************************************************/ <br>class MhtFileMaker{ <br>var $config = array(); <br>var $headers = array(); <br>var $headers_exists = array(); <br>var $files = array(); <br>var $boundary; <br>var $dir_base; <br>var $page_first; <br>function MhtFile($config = array()){ <br>} <br>function SetHeader($header){ <br>$this->headers[] = $header; <br>$key = strtolower(substr($header, 0, strpos($header, ':'))); <br>$this->headers_exists[$key] = TRUE; <br>} <br>function SetFrom($from){ <br>$this->SetHeader("From: $from"); <br>} <br>function SetSubject($subject){ <br>$this->SetHeader("Subject: $subject"); <br>} <br>function SetDate($date = NULL, $istimestamp = FALSE){ <br>if ($date == NULL) { <br>$date = time(); <br>} <br>if ($istimestamp == TRUE) { <br>$date = date('D, d M Y H:i:s O', $date); <br>} <br>$this->SetHeader("Date: $date"); <br>} <br>function SetBoundary($boundary = NULL){ <br>if ($boundary == NULL) { <br>$this->boundary = '--' . strtoupper(md5(mt_rand())) . '_MULTIPART_MIXED'; <br>} else { <br>$this->boundary = $boundary; <br>} <br>} <br>function SetBaseDir($dir){ <br>$this->dir_base = str_replace("\\", "/", realpath($dir)); <br>} <br>function SetFirstPage($filename){ <br>$this->page_first = str_replace("\\", "/", realpath("{$this->dir_base}/$filename")); <br>} <br>function AutoAddFiles(){ <br>if (!isset($this->page_first)) { <br>exit ('Not set the first page.'); <br>} <br>$filepath = str_replace($this->dir_base, '', $this->page_first); <br>$filepath = 'http://mhtfile' . $filepath; <br>$this->AddFile($this->page_first, $filepath, NULL); <br>$this->AddDir($this->dir_base); <br>} <br>function AddDir($dir){ <br>$handle_dir = opendir($dir); <br>while ($filename = readdir($handle_dir)) { <br>if (($filename!='.') && ($filename!='..') && ("$dir/$filename"!=$this->page_first)) { <br>if (is_dir("$dir/$filename")) { <br>$this->AddDir("$dir/$filename"); <br>} elseif (is_file("$dir/$filename")) { <br>$filepath = str_replace($this->dir_base, '', "$dir/$filename"); <br>$filepath = 'http://mhtfile' . $filepath; <br>$this->AddFile("$dir/$filename", $filepath, NULL); <br>} <br>} <br>} <br>closedir($handle_dir); <br>} <br>function AddFile($filename, $filepath = NULL, $encoding = NULL){ <br>if ($filepath == NULL) { <br>$filepath = $filename; <br>} <br>$mimetype = $this->GetMimeType($filename); <br>$filecont = file_get_contents($filename); <br>$this->AddContents($filepath, $mimetype, $filecont, $encoding); <br>} <br>function AddContents($filepath, $mimetype, $filecont, $encoding = NULL){ <br>if ($encoding == NULL) { <br>$filecont = chunk_split(base64_encode($filecont), 76); <br>$encoding = 'base64'; <br>} <br>$this->files[] = array('filepath' => $filepath, <br>'mimetype' => $mimetype, <br>'filecont' => $filecont, <br>'encoding' => $encoding); <br>} <br>function CheckHeaders(){ <br>if (!array_key_exists('date', $this->headers_exists)) { <br>$this->SetDate(NULL, TRUE); <br>} <br>if ($this->boundary == NULL) { <br>$this->SetBoundary(); <br>} <br>} <br>function CheckFiles(){ <br>if (count($this->files) == 0) { <br>return FALSE; <br>} else { <br>return TRUE; <br>} <br>} <br>function GetFile(){ <br>$this->CheckHeaders(); <br>if (!$this->CheckFiles()) { <br>exit ('No file was added.'); <br>} <br>$contents = implode("\r\n", $this->headers); <br>$contents .= "\r\n"; <br>$contents .= "MIME-Version: 1.0\r\n"; <br>$contents .= "Content-Type: multipart/related;\r\n"; <br>$contents .= "\tboundary=\"{$this->boundary}\";\r\n"; <br>$contents .= "\ttype=\"" . $this->files[0]['mimetype'] . "\"\r\n"; <br>$contents .= "X-MimeOLE: Produced By Mht File Maker v1.0 beta\r\n"; <br>$contents .= "\r\n"; <br>$contents .= "This is a multi-part message in MIME format.\r\n"; <br>$contents .= "\r\n"; <br>foreach ($this->files as $file) { <br>$contents .= "--{$this->boundary}\r\n"; <br>$contents .= "Content-Type: $file[mimetype]\r\n"; <br>$contents .= "Content-Transfer-Encoding: $file[encoding]\r\n"; <br>$contents .= "Content-Location: $file[filepath]\r\n"; <br>$contents .= "\r\n"; <br>$contents .= $file['filecont']; <br>$contents .= "\r\n"; <br>} <br>$contents .= "--{$this->boundary}--\r\n"; <br>return $contents; <br>} <br>function MakeFile($filename){ <br>$contents = $this->GetFile(); <br>$fp = fopen($filename, 'w'); <br>fwrite($fp, $contents); <br>fclose($fp); <br>} <br>function GetMimeType($filename){ <br>$pathinfo = pathinfo($filename); <br>switch ($pathinfo['extension']) { <br>case 'htm': $mimetype = 'text/html'; break; <br>case 'html': $mimetype = 'text/html'; break; <br>case 'txt': $mimetype = 'text/plain'; break; <br>case 'cgi': $mimetype = 'text/plain'; break; <br>case 'php': $mimetype = 'text/plain'; break; <br>case 'css': $mimetype = 'text/css'; break; <br>case 'jpg': $mimetype = 'image/jpeg'; break; <br>case 'jpeg': $mimetype = 'image/jpeg'; break; <br>case 'jpe': $mimetype = 'image/jpeg'; break; <br>case 'gif': $mimetype = 'image/gif'; break; <br>case 'png': $mimetype = 'image/png'; break; <br>default: $mimetype = 'application/octet-stream'; break; <br>} <br>return $mimetype; <br>} <br>} <br>?> <br></li><li> </li><li> </li></ol></pre></li></ol></pre>