1.簡介
es預設使用standard分詞器 es還有其他分詞器比如simple writespace language
2.配置中文分詞器(需先安裝git maven unzip)
git clone https://github.com/medcl/elasticsearch-analysis-ik.git
cd elasticsearch-analysis-ik
mvn package
mkdir /usr/local/elasticsearch/plugins/ik
mv target/releases/elasticsearch-analysis-ik-6.4.0.zip /usr/local/elasticsearch/plugins/ik
cd /usr/local/elasticsearch/plugins/ik
unzip elasticsearch-analysis-ik-6.4.0.zip
rm -rf elasticsearch-analysis-ik-6.4.0.zip
chown -R es:gp ik
./bin/elasticsearch -d
3.測試中文分詞器
# ik_max_word盡可能多切分單詞
GET _analyze
{
"analyzer":"ik_max_word",
"text":"阿裡巴巴"
}
ik_smart盡可能少切分單詞
GET _analyze
{
"analyzer":"ik_smart",
"text":"阿裡巴巴"
}
4.配置拼音分詞器
git clone https://github.com/medcl/elasticsearch-analysis-pinyin.git
cd elasticsearch-analysis-pinyin
mvn package
mkdir /usr/local/elasticsearch/plugins/pinyin
mv target/releases/elasticsearch-analysis-pinyin-6.4.0.zip /usr/local/elasticsearch/plugins/pinyin
cd /usr/local/elasticsearch/plugins/pinyin
unzip elasticsearch-analysis-pinyin-6.4.0.zip
rm -rf elasticsearch-analysis-pinyin-6.4.0.zip
chown -R es:gp pinyin
./bin/elasticsearch -d
5.測試拼音分詞器
GET _analyze
{
"text": "阿裡巴巴",
"analyzer": "pinyin"
}