聊聊Doug Cutting
![](https://img.laitimes.com/img/__Qf2AjLwojIjJCLyojI0JCLiAzNfRHLGZkRGZkRfJ3bs92YsYTMfVmepNHLwhmMjBnSXllesJTYx40MMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnLkFWYxImZwUmMygDZ2cjYhRzN2QzNjFWM2EDZjZTNhlzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
ElasticSearch概述
ES和solr的差别
ElasticSearch安装
ES核心概念
IK分词器插件
Rest风格说明
关于文档的基本操作
集成SpringBoot
package com.xiaofan;
import com.alibaba.fastjson.JSON;
import com.xiaofan.pojo.User;
import org.apache.http.entity.ContentType;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.TimeUnit;
@SpringBootTest
class EsApiApplicationTests {
public static final String INDEX = "xiaofan_test_index";
@Autowired
@Qualifier(value = "restHighLevelClient")
private RestHighLevelClient client;
// 创建索引
@Test
void testCreateIndex() throws IOException {
// 1. 创建索引请求
CreateIndexRequest request = new CreateIndexRequest(INDEX);
// 2. 客户端执行请求, IndicesClient,请求后获得响应
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
System.out.println(createIndexResponse);
}
// 测试索引存在
@Test
void testExistsIndex() throws IOException {
GetIndexRequest request = new GetIndexRequest(INDEX);
boolean exists = client.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
// 删除索引
@Test
void testDeleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest(INDEX);
AcknowledgedResponse acknowledgedResponse = client.indices().delete(request, RequestOptions.DEFAULT);
System.out.println(acknowledgedResponse.isAcknowledged());
}
// 添加文档
@Test
void testAddDocument() throws IOException {
User user = new User("狂神说", 28);
IndexRequest request = new IndexRequest(INDEX);
// 规则 PUT /index/_doc/1
request.id("1");
request.timeout(TimeValue.timeValueSeconds(1));
// 将数据放入请求 json
request.source(JSON.toJSONString(user), XContentType.JSON);
IndexResponse response = client.index(request, RequestOptions.DEFAULT);
System.out.println(response.toString());
System.out.println(response.status());
}
// 获取文档 判断是否存在 GET /index/_doc/1
@Test
void testIsExists() throws IOException {
GetRequest request = new GetRequest(INDEX, "1");
// 不获取返回的 _source 的上下文了
request.fetchSourceContext(new FetchSourceContext(false));
request.storedFields("_none_");
boolean exists = client.exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
// 获取文档
/**
* 返回结果:
* {"age":28,"name":"狂神说"}
* {"_index":"xiaofan_test_index","_type":"_doc","_id":"1","_version":1,"_seq_no":0,"_primary_term":1,"found":true,"_source":{"age":28,"name":"狂神说"}}
*/
@Test
void testGetDocument() throws IOException {
GetRequest request = new GetRequest(INDEX, "1");
GetResponse response = client.get(request, RequestOptions.DEFAULT);
System.out.println(response.getSourceAsString());
System.out.println(response);
}
// 更新文档
@Test
void testUpdateDocument() throws IOException {
UpdateRequest request = new UpdateRequest(INDEX, "1");
request.timeout("1s");
User user = new User("小范说Java", 18);
request.doc(JSON.toJSONString(user), XContentType.JSON);
UpdateResponse updateResponse = client.update(request, RequestOptions.DEFAULT);
System.out.println(updateResponse);
}
// 删除文档
@Test
void testDeleteDocument() throws IOException {
DeleteRequest request = new DeleteRequest(INDEX, "1");
request.timeout("1s");
DeleteResponse deleteResponse = client.delete(request, RequestOptions.DEFAULT);
System.out.println(deleteResponse);
}
// 批量插入数据(修改,删除类似操作)
@Test
void testBulkRequest() throws IOException {
BulkRequest request = new BulkRequest();
request.timeout("10s");
ArrayList<User> users = new ArrayList<>();
users.add(new User("kuangshen1", 21));
users.add(new User("kuangshen2", 22));
users.add(new User("kuangshen3", 23));
users.add(new User("xiaofan1", 18));
users.add(new User("xiaofan2", 19));
// 批处理请求, 修改,删除,只要在这里修改相应的请求就可以
for (int i = 0; i < users.size(); i++) {
request.add(new IndexRequest(INDEX)
.id(String.valueOf(i + 1))
.source(JSON.toJSONString(users.get(i)), XContentType.JSON));
}
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
//是否失败,返回false表示成功
System.out.println(bulkResponse.hasFailures());
}
// 查询文档
@Test
void testSearch() throws IOException {
SearchRequest searchRequest = new SearchRequest(INDEX);
// 构建搜索条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 查询条件, 可以使用QueryBuilders工具类实现
// QueryBuilders.termQuery 精确
// QueryBuilders.matchLLQuery() 匹配所有
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "kuangshen1");
// MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSON(searchResponse.getHits()));
System.out.println("======================================");
for (SearchHit documentFields : searchResponse.getHits().getHits()) {
System.out.println(documentFields.getSourceAsMap());
}
}
}
添加依赖
<properties>
<java.version>1.8</java.version>
<elasticsearch.version>7.2.1</elasticsearch.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.73</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
配置信息
server.port=9090
# thymeleaf
spring.thymeleaf.cache=false
es配置
/src/main/java/com/kuang/kuangshenesjd/config/ElasticSearchClientConfig.java
```java
package com.kuang.kuangshenesjd.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
// 狂神的Spring两步骤
// 1.找对象
// 2.放到spring中
// 3.如果是springboot,先分析源码
// xxxx AutoConfiguration xxxProperfile
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http"))
);
return client;
}
}
# 爬取数据
数据问题?数据库中获取,消息队列中获取,都可以称为数据源。也可用爬虫解决。(当前只需要少量数据进行测试,所以项目中需先进行数据爬取解析)
## 爬取数据:
import com.kuang.kuangshenesjd.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
@Component
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
new HtmlParseUtil().parseJD("java").forEach(System.out::println);
}
public ArrayList<Content> parseJD(String keywords) throws IOException {
// 获取请求,前提需要联网,ajax不能获取到!
String url = "https://search.jd.com/Search?keyword=" + keywords + "&enc=utf-8";
// 解析网页.(Jsoup返回的Document就是页面对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 所有你在js中可以使用的方法
Element element = document.getElementById("J_goodsList");
// 获取所有的li元素
Elements elements = element.getElementsByTag("li");
// 获取元素中的内容,el代表每一个li标签
ArrayList<Content> goodsList = new ArrayList<>();
for (Element el : elements) {
// 关于这种图片特别多的网站,都是延迟加载的
String image = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(image);
content.setPrice(price);
goodsList.add(content);
System.out.println("=========================");
System.out.println(image);
System.out.println(price);
System.out.println(title);
}
return goodsList;
}
}
server层实现业务编写–ES存储数据,获取ES数据
import com.alibaba.fastjson.JSON;
import com.kuang.kuangshenesjd.pojo.Content;
import com.kuang.kuangshenesjd.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
// 业务编写
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
// 1. 解析数据,放进es索引库中
public Boolean parseContent(String keywords) throws IOException {
ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
// 把查询的数据放到到es中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return bulkResponse.hasFailures();
}
// 2.获取数据实现搜索功能
public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo < 1) {
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 执行搜索
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
list.add(hit.getSourceAsMap());
}
return list;
}
// 3.获取数据,高亮
public List<Map<String, Object>> searchPageHighlighter(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo < 1) {
pageNo = 1;
}
// 条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(false);// 关闭多个高亮显示
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
// 执行搜索
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
// 解析高亮的字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> map = hit.getSourceAsMap();// 原来的结果
// 解析高亮的字段
if (title != null) {
// 将高亮的字段替换成原来没有高亮的字段
Text[] fragments = title.fragments();
String newTitle = "";
for (Text text : fragments) {
newTitle += text;
}
map.put("title", newTitle);
}
list.add(map);
}
return list;
}
}
controller层负责调用业务层(server)
通过接口的方式爬取数据,并存储在es中
package com.kuang.kuangshenesjd.controller;
import com.kuang.kuangshenesjd.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.parseContent(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public String search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize, Model model) throws IOException {
if (pageNo == 0) {
pageNo = 1;
}
System.out.println(keyword + pageNo + pageSize);
List<Map<String, Object>> list = contentService.searchPageHighlighter(keyword, pageNo, pageSize);
model.addAttribute("list", list);
return "index";
}
}