效率非常客观,你要是改成其他用处那效率我就不保证了
- /* vim: set expandtab tabstop=4 shiftwidth=4: */
- // +------------------------------------------------------------------------
- // Name : 权重计算
- // Description: 稍加修改,亦可用于分词,词频统计,全文检索和垃圾检测
- // Date : 2013/12/16 08:51
- // Authors : latel
- // +------------------------------------------------------------------------
- //
- /*外部调用示例*/
- /*
- $aItems = array(
- 'chinaisbig',
- 'whichisnot',
- 'totalyrightforme',
- );
- $aTable = array(
- 'china,is|small',
- 'china,big|me',
- 'china,is|big,which|not,me',
- 'totaly|right,for,me',
- );
- $oWeight = new ttrie;
- $oWeight->newItems($aItems);
- $aResult = $oWeight->newTable($aTable);
- */
- class weight {
- protected $aDict = array(array());
- protected $aItems = array();
- protected $sLastRule;
- protected $aMatchs = array();
- protected $aShow = array();
- private function init() {
- //清空记录的匹配表和输出结果
- unset($this->aShow);
- }
- public function newItems($mItems) {
- //导入新的项目
- $this->aItems = (is_array($mItems))? $mItems: array($mItems);
- $this->init();
- }
- public function newTable(array $aTable) {
- //导入新的对照表,并生成字典
- foreach($aTable as $iTableKey=>$sTableLine) {
- $aTableLine = explode(',', str_replace('|', ',', $sTableLine));
- $setter = function($v, $k, $paraMeter) {
- $k1 = $paraMeter[0]; $oWeight = $paraMeter[1];
- $oWeight->genDict($v, $k1);
- };
- array_walk($aTableLine, $setter, array($iTableKey, $this));
- }
- $this->init();
- }
- public function getShow($sRule = 'max') {
- //获取最终的显示结果
- if(empty($this->aItems) || empty($this->aDict))
- return array();
- if (empty($this->aShow) || $sRule != $this->sLastRule)
- return $this->genShow($sRule);
- return $this->aShow;
- }
- public function genShow($sRule) {
- $aShow = array();
- $aMatchs = array();
- $getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule) {
- $t = array_count_values($oWeight->matchWord($v));
- $aMatchs[] = $t;
- switch ($sRule) {
- case 'max':
- $aShow[$k] = array_keys($t, max($t));
- break;
- }
- };
- array_walk($this->aItems, $getter, $this);
- $this->aShow = $aShow;
- $this->aMatchs = $aMatchs;
- return $aShow;
- }
- private function genDict($mWord, $iKey = '') {
- $iInsertPonit = count($this->aDict);
- $iCur = 0; //当前节点号
- foreach (str_split($mWord) as $iChar) {
- if (isset($this->aDict[$iCur][$iChar])) {
- $iCur = $this->aDict[$iCur][$iChar];
- continue;
- }
- $this->aDict[$iInsertPonit] = array();
- $this->aDict[$iCur][$iChar] = $iInsertPonit;
- $iCur = $iInsertPonit;
- $iInsertPonit++;
- }
- $this->aDict[$iCur]['acc'][] = $iKey;
-
- }
- function matchWord($sLine) {
- $iCur = $iOffset = $iPosition = 0;
- $sLine .= "\0";
- $iLen = strlen($sLine);
- $aReturn = array();
- while($iOffset < $iLen) {
- $sChar = $sLine{$iOffset};
- if(isset($this->aDict[$iCur][$sChar])) {
- $iCur = $this->aDict[$iCur][$sChar];
- if(isset($this->aDict[$iCur]['acc'])) {
- $aReturn = array_merge($aReturn, $this->aDict[$iCur]['acc']);
- $iPosition = $iOffset + 1;
- $iCur = 0;
- }
- } else {
- $iCur = 0;
- $iOffset = $iPosition;
- $iPosition = $iOffset + 1;
- }
- ++$iOffset;
- }
- return $aReturn;
- }
- }
- ?>
|