当前位置:Gxlcms > PHP教程 > 路径中包含汉语言时,PHP解析失败

路径中包含汉语言时,PHP解析失败

时间:2021-07-01 10:21:17 帮助过:3人阅读

路径中包含中文时,PHP解析失败

header("Content-Type: text/html; charset=utf-8");
//static $count=0;

//抓包,识别ttx扩展名的文件,提取前100个字节,并插入数据库
function read100Bytes(&$extAndDir)
{
$conn=new mysqli("localhost","root","cai123","test");
if(mysqli_connect_errno())
{
die(mysqli_connect_error());
}
$conn->query("set names utf8");
$sql="insert into spiderTXT(file_name,file_link,file_100_byte) values(?,?,?)";
$stmt=$conn->prepare($sql);
$stmt->bind_param("sss",$file_name,$file_link,$file_100_byte);


$textFile=$extAndDir["text"];

foreach($textFile as $link)
{
if(strtolower(pathinfo($link)["extension"])=="txt")
{
if(file_exists($link) && is_readable($link))
{
$fd=fopen($link,"r") or die("打开文件失败".__line__);
$content=fread($fd,100);
//return $content;
$file_name=pathinfo($link)["basename"];
$file_link=pathinfo($link)["dirname"];
$file_100_byte=$content;
$stmt->execute();
$stmt->close();
fclose($fd);
}
else
{
die("文件不存在或者不可读".__line__);
}

}

}



$conn->close();
}
//根据扩展名分类
function extFilter(&$extAndDir,&$fileList,&$extFormat)
{
$ext=$extAndDir[0];
$link=$extAndDir[1];
if(in_array(strtolower($ext),$extFormat["img"],true))
{
array_push($fileList["img"],$link);
}
else if(in_array(strtolower($ext),$extFormat["video"],true))
{
array_push($fileList["video"],$link);
}
else if(in_array(strtolower($ext),$extFormat["sound"],true))
{
array_push($fileList["sound"],$link);
}
else if(in_array(strtolower($ext),$extFormat["text"],true))
{
array_push($fileList["text"],$link);
}
else if(in_array(strtolower($ext),$extFormat["pack"],true))
{
array_push($fileList["pack"],$link);
}
else if(in_array(strtolower($ext),$extFormat["execute"],true))
{
array_push($fileList["execute"],$link);
}
else
{
array_push($fileList["other"],$link);
}
}

//提取路径名,因为使用dirname无法提取完整的路径名,所以写了这个
function getDirName($path)
{
$pat="#(\.\w{2,3})$#";
//如果传的是路径+文件如 c:\sys.ini, 则提取路径名
if(preg_match($pat,$path,$match))
{
//echo "extension name: ".$match[1];
$pat2="#^(.*)/(\w{1,100}\.\w{2,3})$#i";
if(preg_match($pat2, $path,$mat))
{
return $mat[1];
}
else
{
echo "not match".__line__."
";
}
}
else
{
return $path;
}
}


//读取路径,并根据文件的扩展名分类
function readFileList($path,&$extFormat)
{
static $fileList=array("img"=>array(), "video"=>array(), "sound"=>array(),"text"=>array(),"pack"=>array(),"execute"=>array(),"other"=>array());
$encoding=mb_detect_encoding($path,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
//echo $encoding;
//exit();
//if (in_array($encoding,array("cp936","gbk" ,"gb2312"),true))
//{
$path=iconv($encoding,"utf-8",$path);
//}

$dirName=getDirName($path);
//$dirName=dirname($path);

//根据文件夹的名字,命名数组的关键字
/* $pat="#/(.+)$#i";
preg_match($pat, $dirName,$match);
$match=explode("/",$match[1]);
$packs=array_pop($match);
$fileList["$packs"]=null;
*/
if ($fd=opendir($dirName))
{
while($fileName=readdir($fd))
{
$encoding=mb_detect_encoding($fileName,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
//echo $encoding;
//exit();
//if (in_array($encoding,array("cp936","gbk" ,"gb2312"),true))
//{
$fileName=iconv($encoding,"utf-8",$fileName);
//}

if($fileName !="." && $fileName !="..")
{

if(is_file($dirName.'/'.$fileName))
{
@$extName=pathinfo($path."/".$fileName)["extension"];
$linkName=$dirName."/".$fileName;

$extAndDir=array($extName,$linkName);
extFilter($extAndDir,$fileList,$extFormat);
//read100Bytes($extAndDir);

}
else if(is_dir($dirName.'/'.$fileName))
{
//$fileName=iconv("gb2312","utf-8",$fileName);
readFileList($dirName.'/'.$fileName,$extFormat);
//echo $dirName.'/'.$fileName."
";
//die("读取目录出错");
}
}

}
return $fileList;
}
@closedir($fd);
}
//迭代数据
function iter(&$arr)
{
if(!is_array($arr))
return;
//$num=count($arr);
$list=array();
//echo "length".$num;
foreach($arr as $name=>$val)
{
foreach($val as $item)
array_push($list,$item);
}
return $list;
}

//组装批量插入数据
function mysqliInsert(&$arr)
{
$conn= new MySQLi("localhost", "root", "cai123", "test");
if(!$conn)
{
die("连接服务器失败".$conn->error);
}
$conn->query("set names utf8");
$sql="INSERT INTO img(link) VALUES";
foreach ($arr as $key=>$val)
{
foreach($val as $ads)
{
$sql .= "('".$ads."'),";

}
}
//去掉最后一个逗号,
$sql=substr($sql,0, strlen($sql)-1);
//echo $sql;

$res = $conn->query($sql);

if(!$res)
{
die("插入数据库失败".$conn->error);
}
else
{
if($conn->affected_rows>0)
{
exit();
}
else
{
die("没有产生影响");
}
}
$conn->close();

}
//预处理批量插入数据
function stmtInsert(&$arr)
{
$conn= new mysqli("localhost", "root", "cai123", "test");
if(mysqli_connect_errno())
{
die(mysqli_connect_error());
}
$conn->query("set names utf8");
$sql="insert into img(link) values(?)";
$stmt=$conn->prepare($sql);
$stmt->bind_param("s",$items);

foreach($arr as $key=>$links)
{
foreach ($links as $item)
{
$items=$item;
$b=$stmt->execute();
if(!$b)
{
die($conn->error);
}
}
}
$stmt->close();
$conn->close();
}
//
function showFile()
{
$conn= new MySQLi("localhost", "root", "cai123", "test");
if(!$conn)
{
die("连接服务器失败".$conn->error);
}
$conn->query("set names utf8");
$sql=" SELECT * FROM img GROUP BY link";
$res=$conn->query($sql);
if(!$res)
{
die("查询失败");
}
while($row=$res->fetch_assoc())
{
echo "
";
}
$res->free();
}

//转换window环境下路径的默认分隔符\为PHP识别更好的/
function transPathSep($path)
{
//$encoding=mb_detect_encoding($path,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
//$path=iconv($encoding,"utf-8",$path);

$system=$_SERVER["SERVER_SOFTWARE"];
$pat="#\((.*?)\)#";
$sysVer=null;
if(preg_match($pat,$system,$match))
{
$sysVer=$match[1];
}
else
{
die("匹配系统类型失败");
}
if(strtolower($sysVer)=="win32")
{
$realPath=str_replace("\\","/",$path);
return $realPath;
}
}

//static $fileList=array("img"=>array(), "video"=>array(), "sound"=>array(),"text"=>array(),"pack"=>array());
//在此处可以决定把什么类型的文件,归类到哪种类型
$imgFormat=array("img"=>array("jpg", "jpeg","png", "bmp","gif","ico"),
"video"=>array("avi","dvix","div","xvid","mpge","mpg","dat","wmv","asx","rm","rmvb","mov","qt","3gp","3g2","mp4","m4v","flv"),
"sound"=>array("mp3","wav","wma","ape","mod","aiff","voc","vov","asf"),
"text"=>array("word","txt","pdf","chm","ppt","ini","html","css","js"),
"pack"=>array("rar" ,"zip","7z","cab","arj","lzh","tar","gz","ace","uue","bz2","jar","iso","mpq"),
"execute"=>array("exe","bat","msi"));



/* 遇到一个问题,当路径中出现中文时,会提示打开目录失败,且显示为乱码,比如$dir="E:\Books\php\php 程序设计";时会出现,CSDN的可用分只有33了..............蛋疼
*/
$dir="C:\Windows\Boot";

$path=transPathSep($dir);

//echo pathinfo($path)["dirname"];
$arr=readFileList($path,$imgFormat);

echo "
";
print_r($arr);
echo "
";
//read100Bytes($arr);

echo "
OK";
?>




------解决思路----------------------
你这样能不出错吗?
while($fileName=readdir($fd))
{
$encoding=mb_detect_encoding($fileName,array("cp936","gbk" ,"gb2312", "utf-8","ISO-8859-1","ASCII"));
$fileName=iconv($encoding,"utf-8",$fileName); //非utf-8的文件名被转成了utf-8的,那么文件还能存在吗?

if($fileName !="." && $fileName !="..")
{

if(is_file($dirName.'/'.$fileName))

人气教程排行