天天看點

ES搜尋引擎Java用戶端API-QueryBuilder基本查詢

package com.xx.xx.services.xx.xx.es;

import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map.Entry;
import org.elasticsearch.action.ListenableActionFuture;
import org.elasticsearch.action.get.GetRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.junit.Before;
import org.junit.Test;

/**
 * @Author: wrf
 * @Date: 2018/1/4
 */
public class EsTest {
    private TransportClient client;

    @Before
    public void testBefore() {
        Settings settings = Settings.settingsBuilder().put("cluster.name", "wenbronk_escluster").build();
        client = TransportClient.builder().settings(settings).build()
            .addTransportAddress(new InetSocketTransportAddress(new InetSocketAddress("192.168.50.37", 9300)));
        System.out.println("success to connect escluster");
    }

    /**
     * 使用get查詢
     */
    @Test
    public void testGet() {
        GetRequestBuilder requestBuilder = client.prepareGet("twitter", "tweet", "1");
        GetResponse response = requestBuilder.execute().actionGet();
        GetResponse getResponse = requestBuilder.get();
        ListenableActionFuture<GetResponse> execute = requestBuilder.execute();
        System.out.println(response.getSourceAsString());
    }

    /**
     * 使用QueryBuilder
     * termQuery("key", obj) 完全比對
     * termsQuery("key", obj1, obj2..)   一次比對多個值
     * matchQuery("key", Obj) 單個比對, field不支援通配符, 字首具進階特性
     * multiMatchQuery("text", "field1", "field2"..);  比對多個字段, field有通配符忒行
     * matchAllQuery();         比對所有檔案
     */
    @Test
    public void testQueryBuilder() {
//        QueryBuilder queryBuilder = QueryBuilders.termQuery("user", "kimchy");
//     QueryBUilder queryBuilder = QueryBuilders.termQuery("user", "kimchy", "wenbronk", "vini");
        QueryBuilders.termsQuery("user", new ArrayList<String>().add("kimchy"));
//        QueryBuilder queryBuilder = QueryBuilders.matchQuery("user", "kimchy");
//        QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery("kimchy", "user", "message", "gender");
        QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
        searchFunction(queryBuilder);

    }

    /**
     * 組合查詢
     * must(QueryBuilders) :   AND
     * mustNot(QueryBuilders): NOT
     * should:                  : OR
     */
    @Test
    public void testQueryBuilder2() {
        QueryBuilder queryBuilder = QueryBuilders.boolQuery()
            .must(QueryBuilders.termQuery("user", "kimchy"))
            .mustNot(QueryBuilders.termQuery("message", "nihao"))
            .should(QueryBuilders.termQuery("gender", "male"));
        searchFunction(queryBuilder);
    }

    /**
     * 隻查詢一個id的
     * QueryBuilders.idsQuery(String...type).ids(Collection<String> ids)
     */
    @Test
    public void testIdsQuery() {
        QueryBuilder queryBuilder = QueryBuilders.idsQuery().ids("1");
        searchFunction(queryBuilder);
    }

    /**
     * 包裹查詢, 高于設定分數, 不計算相關性
     */
    @Test
    public void testConstantScoreQuery() {
        QueryBuilder queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("name", "kimchy"))
            .boost(2.0f);
        searchFunction(queryBuilder);
        // 過濾查詢
//        QueryBuilders.constantScoreQuery(FilterBuilders.termQuery("name", "kimchy")).boost(2.0f);

    }

    /**
     * disMax查詢
     * 對子查詢的結果做union, score沿用子查詢score的最大值,
     * 廣泛用于muti-field查詢
     */
    @Test
    public void testDisMaxQuery() {
        QueryBuilder queryBuilder = QueryBuilders.disMaxQuery()
            .add(QueryBuilders.termQuery("user", "kimch"))  // 查詢條件
            .add(QueryBuilders.termQuery("message", "hello"))
            .boost(1.3f)
            .tieBreaker(0.7f);
        searchFunction(queryBuilder);
    }

    /**
     * 模糊查詢
     * 不能用通配符, 不知道幹啥用
     */
    @Test
    public void testFuzzyQuery() {
        QueryBuilder queryBuilder = QueryBuilders.fuzzyQuery("user", "kimch");
        searchFunction(queryBuilder);
    }

    /**
     * 父或子的文檔查詢
     */
    @Test
    public void testChildQuery() {
        QueryBuilder queryBuilder = QueryBuilders.hasChildQuery("sonDoc", QueryBuilders.termQuery("name", "vini"));
        searchFunction(queryBuilder);
    }

    /**
     * moreLikeThisQuery: 實作基于内容推薦, 支援實作一句話相似文章查詢
     * {
     * "more_like_this" : {
     * "fields" : ["title", "content"],   // 要比對的字段, 不填預設_all
     * "like_text" : "text like this one",   // 比對的文本
     * }
     * }
     *
     * percent_terms_to_match:比對項(term)的百分比,預設是0.3
     *
     * min_term_freq:一篇文檔中一個詞語至少出現次數,小于這個值的詞将被忽略,預設是2
     *
     * max_query_terms:一條查詢語句中允許最多查詢詞語的個數,預設是25
     *
     * stop_words:設定停止詞,比對時會忽略停止詞
     *
     * min_doc_freq:一個詞語最少在多少篇文檔中出現,小于這個值的詞會将被忽略,預設是無限制
     *
     * max_doc_freq:一個詞語最多在多少篇文檔中出現,大于這個值的詞會将被忽略,預設是無限制
     *
     * min_word_len:最小的詞語長度,預設是0
     *
     * max_word_len:最多的詞語長度,預設無限制
     *
     * boost_terms:設定詞語權重,預設是1
     *
     * boost:設定查詢權重,預設是1
     *
     * analyzer:設定使用的分詞器,預設是使用該字段指定的分詞器
     */
    @Test
    public void testMoreLikeThisQuery() {
        QueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("user")
            .like("kimchy");
//                            .minTermFreq(1)         //最少出現的次數
//                            .maxQueryTerms(12);        // 最多允許查詢的詞語
        searchFunction(queryBuilder);
    }

    /**
     * 字首查詢
     */
    @Test
    public void testPrefixQuery() {
        QueryBuilder queryBuilder = QueryBuilders.matchQuery("user", "kimchy");
        searchFunction(queryBuilder);
    }

    /**
     * 查詢解析查詢字元串
     */
    @Test
    public void testQueryString() {
        QueryBuilder queryBuilder = QueryBuilders.queryStringQuery("+kimchy");
        searchFunction(queryBuilder);
    }

    /**
     * 範圍内查詢
     */
    public void testRangeQuery() {
        QueryBuilder queryBuilder = QueryBuilders.rangeQuery("user")
            .from("kimchy")
            .to("wenbronk")
            .includeLower(true)     // 包含上界
            .includeUpper(true);      // 包含下屆
        searchFunction(queryBuilder);
    }

    /**
     * 跨度查詢
     */
    @Test
    public void testSpanQueries() {
        QueryBuilder queryBuilder1 = QueryBuilders
            .spanFirstQuery(QueryBuilders.spanTermQuery("name", "葫蘆580娃"), 30000);     // Max查詢範圍的結束位置

        QueryBuilder queryBuilder2 = QueryBuilders.spanNearQuery()
            .clause(QueryBuilders.spanTermQuery("name", "葫蘆580娃")) // Span Term Queries
            .clause(QueryBuilders.spanTermQuery("name", "葫蘆3812娃"))
            .clause(QueryBuilders.spanTermQuery("name", "葫蘆7139娃"))
            .slop(30000)                                               // Slop factor
            .inOrder(false)
            .collectPayloads(false);

        // Span Not
        QueryBuilder queryBuilder3 = QueryBuilders.spanNotQuery()
            .include(QueryBuilders.spanTermQuery("name", "葫蘆580娃"))
            .exclude(QueryBuilders.spanTermQuery("home", "山西省太原市2552街道"));

        // Span Or
        QueryBuilder queryBuilder4 = QueryBuilders.spanOrQuery()
            .clause(QueryBuilders.spanTermQuery("name", "葫蘆580娃"))
            .clause(QueryBuilders.spanTermQuery("name", "葫蘆3812娃"))
            .clause(QueryBuilders.spanTermQuery("name", "葫蘆7139娃"));

        // Span Term
        QueryBuilder queryBuilder5 = QueryBuilders.spanTermQuery("name", "葫蘆580娃");
    }

    /**
     * 測試子查詢
     */
    @Test
    public void testTopChildrenQuery() {
        QueryBuilders.hasChildQuery("tweet",
            QueryBuilders.termQuery("user", "kimchy"))
            .scoreMode("max");
    }

    /**
     * 通配符查詢, 支援 *
     * 比對任何字元序列, 包括空
     * 避免* 開始, 會檢索大量内容造成效率緩慢
     */
    @Test
    public void testWildCardQuery() {
        QueryBuilder queryBuilder = QueryBuilders.wildcardQuery("user", "ki*hy");
        searchFunction(queryBuilder);
    }

    /**
     * 嵌套查詢, 内嵌文檔查詢
     */
    @Test
    public void testNestedQuery() {
        QueryBuilder queryBuilder = QueryBuilders.nestedQuery("location",
            QueryBuilders.boolQuery()
                .must(QueryBuilders.matchQuery("location.lat", 0.962590433140581))
                .must(QueryBuilders.rangeQuery("location.lon").lt(36.0000).gt(0.000)))
            .scoreMode("total");

    }

    /**
     * 測試索引查詢
     */
    @Test
    public void testIndicesQueryBuilder() {
        QueryBuilder queryBuilder = QueryBuilders.indicesQuery(
            QueryBuilders.termQuery("user", "kimchy"), "index1", "index2")
            .noMatchQuery(QueryBuilders.termQuery("user", "kimchy"));

    }


    /**
     * 查詢周遊抽取
     */
    private void searchFunction(QueryBuilder queryBuilder) {
        SearchResponse response = client.prepareSearch("twitter")
            .setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
            .setScroll(new TimeValue(60000))
            .setQuery(queryBuilder)
            .setSize(100).execute().actionGet();

        while (true) {
            response = client.prepareSearchScroll(response.getScrollId())
                .setScroll(new TimeValue(60000)).execute().actionGet();
            for (SearchHit hit : response.getHits()) {
                Iterator<Entry<String, Object>> iterator = hit.getSource().entrySet().iterator();
                while (iterator.hasNext()) {
                    Entry<String, Object> next = iterator.next();
                    System.out.println(next.getKey() + ": " + next.getValue());
                    if (response.getHits().hits().length == 0) {
                        break;
                    }
                }
            }
            break;
        }
//        testResponse(response);
    }

    /**
     * 對response結果的分析
     */
    public void testResponse(SearchResponse response) {
        // 命中的記錄數
        long totalHits = response.getHits().totalHits();

        for (SearchHit searchHit : response.getHits()) {
            // 打分
            float score = searchHit.getScore();
            // 文章id
            int id = Integer.parseInt(searchHit.getSource().get("id").toString());
            // title
            String title = searchHit.getSource().get("title").toString();
            // 内容
            String content = searchHit.getSource().get("content").toString();
            // 文章更新時間
            long updatetime = Long.parseLong(searchHit.getSource().get("updatetime").toString());
        }
    }

    /**
     * 對結果設定高亮顯示
     */
    public void testHighLighted() {
        /*  5.0 版本後的高亮設定
         * client.#().#().highlighter(hBuilder).execute().actionGet();
        HighlightBuilder hBuilder = new HighlightBuilder();
        hBuilder.preTags("<h2>");
        hBuilder.postTags("</h2>");
        hBuilder.field("user");        // 設定高亮顯示的字段
        */
        // 加入查詢中
        SearchResponse response = client.prepareSearch("blog")
            .setQuery(QueryBuilders.matchAllQuery())
            .addHighlightedField("user")        // 添加高亮的字段
            .setHighlighterPreTags("<h1>")
            .setHighlighterPostTags("</h1>")
            .execute().actionGet();

        // 周遊結果, 擷取高亮片段
        SearchHits searchHits = response.getHits();
        for (SearchHit hit : searchHits) {
            System.out.println("String方式列印文檔搜尋内容:");
            System.out.println(hit.getSourceAsString());
            System.out.println("Map方式列印高亮内容");
            System.out.println(hit.getHighlightFields());

            System.out.println("周遊高亮集合,列印高亮片段:");
            Text[] text = hit.getHighlightFields().get("title").getFragments();
            for (Text str : text) {
                System.out.println(str.string());
            }
        }
    }

}
      
es

繼續閱讀