php截取utf8或gbk编码中英文字符串
时间:2021-07-01 10:21:17
帮助过:22人阅读
//字符串截取 - $a = "s@@你好";
- var_dump(strlen_weibo($a,'utf-8'));
- 结果输出为8,其中字母s计数为1,全角@计数为2,半角@计数为1,两个中文计数为4。源码如下:
//截取字符串的函数代码 - function strlen_weibo($string, $charset='utf-8')
- {
- $n = $count = 0;
- $length = strlen($string);
- if (strtolower($charset) == 'utf-8')
- {
- while ($n < $length)
- {
- $currentByte = ord($string[$n]);
- if ($currentByte == 9 ||
- $currentByte == 10 ||
- (32 <= $currentByte && $currentByte <= 126)) // bbs.it-home.org
- {
- $n++;
- $count++;
- } elseif (194 <= $currentByte && $currentByte <= 223)
- {
- $n += 2;
- $count += 2;
- } elseif (224 <= $currentByte && $currentByte <= 239)
- {
- $n += 3;
- $count += 2;
- } elseif (240 <= $currentByte && $currentByte <= 247)
- {
- $n += 4;
- $count += 2;
- } elseif (248 <= $currentByte && $currentByte <= 251)
- {
- $n += 5;
- $count += 2;
- } elseif ($currentByte == 252 || $currentByte == 253)
- {
- $n += 6;
- $count += 2;
- } else
- {
- $n++;
- $count++;
- }
- if ($count >= $length)
- {
- break;
- }
- }
- return $count;
- } else
- {
- for ($i = 0; $i < $length; $i++)
- {
- if (ord($string[$i]) > 127)
- {
- $i++;
- $count++;
- }
- $count++;
- }
- return $count;
- }
- }
|