当前位置:Gxlcms > mysql > 使用Coreseek-4.1快速搭建Sphinx中文分词Php-Mysql全文检索搜

使用Coreseek-4.1快速搭建Sphinx中文分词Php-Mysql全文检索搜

时间:2021-07-01 10:21:17 帮助过:49人阅读

CentOS-6.4 安装 Coreseek-4.1 使用 Sphinx 提升项目搜索功能的性能本文只讲解如何在linux下使用: 安装Croeseek-4.1 yum -y install glibc-common libtool autoconf automake mysql-devel expat-devel#如果不安装这个 可能下面 sh buildconf.sh会报错!!!

CentOS-6.4 安装 Coreseek-4.1 使用 Sphinx 提升项目搜索功能的性能 本文只讲解如何在linux下使用:

安装Croeseek-4.1

yum -y install glibc-common libtool autoconf automake mysql-devel expat-devel
#如果不安装这个 可能下面 sh buildconf.sh会报错!!!
cd /data/src
tar -xjf ../software/autoconf-2.64.tar.bz2
cd autoconf-2.64/
./configure
make && make install
cd ../
cd /data/software
wget http://www.coreseek.cn/uploads/csft/4.0/coreseek-4.1-beta.tar.gz
cd /data/src
tar zxf ../software/coreseek-4.1-beta.tar.gz
cd coreseek-4.1-beta/mmseg-3.2.14
./bootstrap
./configure --prefix=/usr/local/mmseg3
make && make install
cd ../
cd /data/src/coreseek-4.1-beta/csft-4.1/
sh buildconf.sh
./configure --prefix=/usr/local/coreseek  --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --without-mysql
make && make install
cd ../
##测试mmseg分词,coreseek搜索(需要预先设置好字符集为zh_CN.UTF-8,确保正确显示中文)
cd testpack
cat var/test/test.xml    #此时应该正确显示中文
/usr/local/mmseg3/bin/mmseg -d /usr/local/mmseg3/etc var/test/test.xml
/usr/local/coreseek/bin/indexer -c etc/csft.conf --all
/usr/local/coreseek/bin/search -c etc/csft.conf 网络搜索
#创建sphinx创建索引的脚本:
mkdir -p /data/sh/other

设置Sphinx更新索引bash脚本

vi /data/sh/other/sphinx_update_index.sh
内容如下:
#!/bin/bash
CONFFILE=/usr/local/coreseek/etc/sphinx_index.conf
/bin/sed s#var\/data\/#var\/data2\/#g ${CONFFILE} > ${CONFFILE}.2
mkdir -p /usr/local/coreseek/var/data2
#/usr/local/coreseek/bin/indexer --config ${CONFFILE}.2 --all --rotate
/usr/local/coreseek/bin/indexer --config ${CONFFILE}.2 --all
pkill -9 searchd
sleep 4
/bin/rm -rf /usr/local/coreseek/var/data/
/bin/mv /usr/local/coreseek/var/data2/ /usr/local/coreseek/var/data/
sleep 2
/usr/local/coreseek/bin/searchd --config ${CONFFILE}
记得设置执行权限
chmod 755 /data/sh/other/sphinx_update_index.sh

配置Sphinx索引源参数配置

################################### PHPCMS ############################################
source cc_phpcms
{
    type = mysql
    sql_host = 172.26.11.75  #此处请改成您的真实配置
    sql_user = phpcms  #此处请改成您的真实配置
    sql_pass = 123456   #此处请改成您的真实配置
    sql_db = phpcms   #此处请改成您的真实配置
    sql_port= 3306  #此处请改成您的真实配置
    sql_query_pre = SET SESSION query_cache_type=OFF
    sql_query_pre = SET character_set_client = 'gbk'
    sql_query_pre = SET character_set_connection ='gbk'
    sql_query_pre = SET character_set_results ='utf8'
    sql_query = SELECT `id`,`catid`,`typeid`,`title`,`status`,`updatetime` from `i_news` #此处请改成您的真实配置
    sql_range_step          = 1000
    sql_attr_timestamp      = updatetime
    sql_attr_uint           = catid
    sql_attr_uint           = typeid
    sql_attr_uint           = status
    sql_query_post  =
    sql_ranged_throttle= 0
}
index cc_phpcms
{
    source   = cc_phpcms
    path   = /dev/shm/cc_phpcms   #放这里比较好,因为这里是linux的内存区!
    docinfo   = extern
    mlock   = 0
    enable_star            = 1
    morphology   = none
    stopwords   =
    min_word_len  = 1
    charset_dictpath = /usr/local/mmseg3/etc/   #注意此处
    charset_type        = zh_cn.utf-8           #注意此处
    html_strip = 1
    html_remove_elements = style, script
    html_index_attrs = img=alt,title; a=title;
}
#################################### SETTING ############################################
indexer
{
    mem_limit   = 300M
}
searchd
{
    # address    = 0.0.0.0
    #listen                  = 3312
    #listen                  = 9312
    #listen                  = 9306:mysql41
    port    = 3312
    log     = /usr/local/coreseek/var/log/searchd.log
    query_log   = /usr/local/coreseek/var/log/query.log
    read_timeout  = 5
    max_children  = 30
    pid_file   = /usr/local/coreseek/var/log/searchd.pid
    max_matches   = 1000
    seamless_rotate  = 1
}

接下来实现数据源支持:让Sphinx支持MySQL数据源

yum -y install mysql-devel libxml2-devel expat-devel
cd /data/src/coreseek-4.1-beta/csft-4.1/
make clean
sh buildconf.sh
 ./configure --prefix=/usr/local/coreseek  --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql
make && make install
cd ../
注意: 如果出现错误提示:“ERROR: cannot find MySQL include files…….To disable MySQL support, use –without-mysql option.“,可按照如下方法处理: 请找到头文件mysql.h所在的目录,一般是/usr/local/mysql/include,请替换为实际的 请找到库文件libmysqlclient.a所在的目录,一般是/usr/local/mysql/lib,请替换为实际的 onfigure参数加上:–with-mysql-includes=/usr/local/mysql/include –with-mysql-libs=/usr/local/mysql/lib,执行后,重新编译安装 #跑sphinx服务脚本
/data/sh/other/sphinx_update_index.sh
如果一切正常,将会顺利看到创建索引的信息如下: [caption id="attachment_1192" align="alignnone" width="620"]使用Coreseek-4.1快速搭建Sphinx中文分词 Php-Mysql 全文检索 搜索引擎 使用Coreseek-4.1快速搭建Sphinx中文分词 Php-Mysql 全文检索 搜索引擎[/caption] /data/sh/other/sphinx_update_index.sh 跑了一次后, 请
vi /data/sh/other/sphinx_update_index.sh
#/usr/local/coreseek/bin/indexer --config ${CONFFILE}.2 --all --rotate
/usr/local/coreseek/bin/indexer --config ${CONFFILE}.2 --all
变成
/usr/local/coreseek/bin/indexer --config ${CONFFILE}.2 --all --rotate
#/usr/local/coreseek/bin/indexer --config ${CONFFILE}.2 --all
也就是将注释调换,这样以后就可以设定个定时计划跑/data/sh/other/sphinx_update_index.sh 脚本了,跑了/sphinx_update_index.sh 脚本后,自动会用–rotate的方式重建索引,也就是说新增加的内容也将会被索引到了。

人气教程排行