聊聊Doug Cutting
ElasticSearch概述
ES和solr的差别
ElasticSearch安裝
ES核心概念
IK分詞器插件
Rest風格說明
關于文檔的基本操作
內建SpringBoot
package com.xiaofan;
import com.alibaba.fastjson.JSON;
import com.xiaofan.pojo.User;
import org.apache.http.entity.ContentType;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.TimeUnit;
@SpringBootTest
class EsApiApplicationTests {
public static final String INDEX = "xiaofan_test_index";
@Autowired
@Qualifier(value = "restHighLevelClient")
private RestHighLevelClient client;
// 建立索引
@Test
void testCreateIndex() throws IOException {
// 1. 建立索引請求
CreateIndexRequest request = new CreateIndexRequest(INDEX);
// 2. 用戶端執行請求, IndicesClient,請求後獲得響應
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
System.out.println(createIndexResponse);
}
// 測試索引存在
@Test
void testExistsIndex() throws IOException {
GetIndexRequest request = new GetIndexRequest(INDEX);
boolean exists = client.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
// 删除索引
@Test
void testDeleteIndex() throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest(INDEX);
AcknowledgedResponse acknowledgedResponse = client.indices().delete(request, RequestOptions.DEFAULT);
System.out.println(acknowledgedResponse.isAcknowledged());
}
// 添加文檔
@Test
void testAddDocument() throws IOException {
User user = new User("狂神說", 28);
IndexRequest request = new IndexRequest(INDEX);
// 規則 PUT /index/_doc/1
request.id("1");
request.timeout(TimeValue.timeValueSeconds(1));
// 将資料放入請求 json
request.source(JSON.toJSONString(user), XContentType.JSON);
IndexResponse response = client.index(request, RequestOptions.DEFAULT);
System.out.println(response.toString());
System.out.println(response.status());
}
// 擷取文檔 判斷是否存在 GET /index/_doc/1
@Test
void testIsExists() throws IOException {
GetRequest request = new GetRequest(INDEX, "1");
// 不擷取傳回的 _source 的上下文了
request.fetchSourceContext(new FetchSourceContext(false));
request.storedFields("_none_");
boolean exists = client.exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
}
// 擷取文檔
/**
* 傳回結果:
* {"age":28,"name":"狂神說"}
* {"_index":"xiaofan_test_index","_type":"_doc","_id":"1","_version":1,"_seq_no":0,"_primary_term":1,"found":true,"_source":{"age":28,"name":"狂神說"}}
*/
@Test
void testGetDocument() throws IOException {
GetRequest request = new GetRequest(INDEX, "1");
GetResponse response = client.get(request, RequestOptions.DEFAULT);
System.out.println(response.getSourceAsString());
System.out.println(response);
}
// 更新文檔
@Test
void testUpdateDocument() throws IOException {
UpdateRequest request = new UpdateRequest(INDEX, "1");
request.timeout("1s");
User user = new User("小範說Java", 18);
request.doc(JSON.toJSONString(user), XContentType.JSON);
UpdateResponse updateResponse = client.update(request, RequestOptions.DEFAULT);
System.out.println(updateResponse);
}
// 删除文檔
@Test
void testDeleteDocument() throws IOException {
DeleteRequest request = new DeleteRequest(INDEX, "1");
request.timeout("1s");
DeleteResponse deleteResponse = client.delete(request, RequestOptions.DEFAULT);
System.out.println(deleteResponse);
}
// 批量插入資料(修改,删除類似操作)
@Test
void testBulkRequest() throws IOException {
BulkRequest request = new BulkRequest();
request.timeout("10s");
ArrayList<User> users = new ArrayList<>();
users.add(new User("kuangshen1", 21));
users.add(new User("kuangshen2", 22));
users.add(new User("kuangshen3", 23));
users.add(new User("xiaofan1", 18));
users.add(new User("xiaofan2", 19));
// 批處理請求, 修改,删除,隻要在這裡修改相應的請求就可以
for (int i = 0; i < users.size(); i++) {
request.add(new IndexRequest(INDEX)
.id(String.valueOf(i + 1))
.source(JSON.toJSONString(users.get(i)), XContentType.JSON));
}
BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);
//是否失敗,傳回false表示成功
System.out.println(bulkResponse.hasFailures());
}
// 查詢文檔
@Test
void testSearch() throws IOException {
SearchRequest searchRequest = new SearchRequest(INDEX);
// 建構搜尋條件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 查詢條件, 可以使用QueryBuilders工具類實作
// QueryBuilders.termQuery 精确
// QueryBuilders.matchLLQuery() 比對所有
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "kuangshen1");
// MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
System.out.println(JSON.toJSON(searchResponse.getHits()));
System.out.println("======================================");
for (SearchHit documentFields : searchResponse.getHits().getHits()) {
System.out.println(documentFields.getSourceAsMap());
}
}
}
添加依賴
<properties>
<java.version>1.8</java.version>
<elasticsearch.version>7.2.1</elasticsearch.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.73</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
配置資訊
server.port=9090
# thymeleaf
spring.thymeleaf.cache=false
es配置
/src/main/java/com/kuang/kuangshenesjd/config/ElasticSearchClientConfig.java
```java
package com.kuang.kuangshenesjd.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
// 狂神的Spring兩步驟
// 1.找對象
// 2.放到spring中
// 3.如果是springboot,先分析源碼
// xxxx AutoConfiguration xxxProperfile
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http"))
);
return client;
}
}
# 爬取資料
資料問題?資料庫中擷取,消息隊列中擷取,都可以稱為資料源。也可用爬蟲解決。(目前隻需要少量資料進行測試,是以項目中需先進行資料爬取解析)
## 爬取資料:
import com.kuang.kuangshenesjd.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
@Component
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
new HtmlParseUtil().parseJD("java").forEach(System.out::println);
}
public ArrayList<Content> parseJD(String keywords) throws IOException {
// 擷取請求,前提需要聯網,ajax不能擷取到!
String url = "https://search.jd.com/Search?keyword=" + keywords + "&enc=utf-8";
// 解析網頁.(Jsoup傳回的Document就是頁面對象)
Document document = Jsoup.parse(new URL(url), 30000);
// 所有你在js中可以使用的方法
Element element = document.getElementById("J_goodsList");
// 擷取所有的li元素
Elements elements = element.getElementsByTag("li");
// 擷取元素中的内容,el代表每一個li标簽
ArrayList<Content> goodsList = new ArrayList<>();
for (Element el : elements) {
// 關于這種圖檔特别多的網站,都是延遲加載的
String image = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(image);
content.setPrice(price);
goodsList.add(content);
System.out.println("=========================");
System.out.println(image);
System.out.println(price);
System.out.println(title);
}
return goodsList;
}
}
server層實作業務編寫–ES存儲資料,擷取ES資料
import com.alibaba.fastjson.JSON;
import com.kuang.kuangshenesjd.pojo.Content;
import com.kuang.kuangshenesjd.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
// 業務編寫
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
// 1. 解析資料,放進es索引庫中
public Boolean parseContent(String keywords) throws IOException {
ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
// 把查詢的資料放到到es中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return bulkResponse.hasFailures();
}
// 2.擷取資料實作搜尋功能
public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo < 1) {
pageNo = 1;
}
// 條件搜尋
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分頁
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精準比對
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 執行搜尋
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析結果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
list.add(hit.getSourceAsMap());
}
return list;
}
// 3.擷取資料,高亮
public List<Map<String, Object>> searchPageHighlighter(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo < 1) {
pageNo = 1;
}
// 條件搜尋
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 分頁
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
// 精準比對
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
// 高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.requireFieldMatch(false);// 關閉多個高亮顯示
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
sourceBuilder.highlighter(highlightBuilder);
// 執行搜尋
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析結果
ArrayList<Map<String, Object>> list = new ArrayList<>();
for (SearchHit hit : response.getHits().getHits()) {
// 解析高亮的字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> map = hit.getSourceAsMap();// 原來的結果
// 解析高亮的字段
if (title != null) {
// 将高亮的字段替換成原來沒有高亮的字段
Text[] fragments = title.fragments();
String newTitle = "";
for (Text text : fragments) {
newTitle += text;
}
map.put("title", newTitle);
}
list.add(map);
}
return list;
}
}
controller層負責調用業務層(server)
通過接口的方式爬取資料,并存儲在es中
package com.kuang.kuangshenesjd.controller;
import com.kuang.kuangshenesjd.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.parseContent(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public String search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize, Model model) throws IOException {
if (pageNo == 0) {
pageNo = 1;
}
System.out.println(keyword + pageNo + pageSize);
List<Map<String, Object>> list = contentService.searchPageHighlighter(keyword, pageNo, pageSize);
model.addAttribute("list", list);
return "index";
}
}