天天看點

Lucene使用案例(包括索引的維護)

本篇部落格記錄使用Lucene的API來實作對索引的增(建立索引)、删(删除索引)、改(修改索引)、查(搜尋資料),以完善我之前的記事本系統為例,直接上核心代碼:

1、Lucene工具類

package com.ue.util;

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;

/**
 * @author LiJun
 * @create 2019-07-02 18:23
 */
public class LuceneUtil {
    /**
     * 擷取索引檔案存放的檔案夾對象
     * @param path
     * @return
     */
    public static Directory getDirectory(String path) {
        Directory directory = null;
        try {
            directory = FSDirectory.open(Paths.get(path));
        } catch
        (IOException e) {
            e.printStackTrace();
        }
        return directory;
    }

    /**
     * 索引檔案存放在記憶體
     * @return
     */
    public static Directory getRAMDirectory() {
        Directory directory = new RAMDirectory();
        return directory;
    }

    /**
     * 檔案夾讀取對象
     * @param directory
     * @return
     */
    public static DirectoryReader getDirectoryReader(Directory directory) {
        DirectoryReader reader = null;
        try {
            reader = DirectoryReader.open(directory);
        } catch
        (IOException e) {
            e.printStackTrace();
        }
        return reader;
    }

    /**
     * 檔案索引對象
     *
     * @param reader
     * @return
     */
    public static IndexSearcher getIndexSearcher(DirectoryReader reader) {
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        return indexSearcher;
    }

    /**
     * 寫入索引對象
     *
     * @param directory
     * @param analyzer
     * @return
     */
    public static IndexWriter getIndexWriter(Directory directory, Analyzer analyzer) {
        IndexWriter iwriter = null;
        try {
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            config.setOpenMode(OpenMode.CREATE_OR_APPEND);

            // Sort sort=new Sort(new SortField("content", Type.STRING));
            // config.setIndexSort(sort);//排序
            config.setCommitOnClose(true);
            // 自動送出
            // config.setMergeScheduler(new ConcurrentMergeScheduler());
            // config.setIndexDeletionPolicy(new
            // SnapshotDeletionPolicy(NoDeletionPolicy.INSTANCE));
            iwriter = new IndexWriter(directory, config);
        } catch
        (IOException e) {
            e.printStackTrace();
        }
        return iwriter;
    }

    /**
     * 關閉索引檔案生成對象以及檔案夾對象
     *
     * @param indexWriter
     * @param directory
     */
    public static void close(IndexWriter indexWriter, Directory directory) {
        if (indexWriter != null) {
            try {
                indexWriter.close();
            } catch
            (IOException e) {
                indexWriter = null;
            }
        }

        if (directory != null) {
            try {
                directory.close();
            } catch
            (IOException e) {
                directory = null;
            }
        }
    }

    /**
     * 關閉索引檔案讀取對象以及檔案夾對象
     *
     * @param reader
     * @param directory
     */
    public static void close(DirectoryReader reader, Directory directory) {
        if (reader != null) {
            try {
                reader.close();
            } catch
            (IOException e) {
                reader = null;
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch
            (IOException e) {
                directory = null;
            }
        }
    }

    /**
     * 高亮标簽
     *
     * @param query
     * @param fieldName
     * @return
     */

    public static Highlighter getHighlighter(Query query, String fieldName) {
        Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Scorer fragmentScorer = new QueryTermScorer(query, fieldName);
//        QueryScorer fragmentScorer=new QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
        highlighter.setTextFragmenter(new SimpleFragmenter(200));
        return highlighter;
    }
}
           

2、對某一表進行索引操作的幫助類

package com.ue.component;

import com.ue.model.Diary;
import com.ue.service.DiaryService;
import com.ue.util.DateUtil;
import com.ue.util.LuceneUtil;
import com.ue.util.StringUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.store.Directory;
import org.jsoup.Jsoup;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.io.File;
import java.io.StringReader;
import java.util.*;

import static com.ue.config.CommonConfig.INDEXPATH;

/**
 * @author LiJun
 * @create  2019-07-02 21:16
 * 将所有的日記生成索引檔案進行存儲
 */
@Component
public class DiaryIndex {
    @Autowired
    private DiaryService diaryService;

    /**
     * 添加日記索引(發表日記的時候添加索引資訊)
     * @throws Exception
     */
    public void addIndex(Diary diary) throws Exception {
        Directory directory = LuceneUtil.getDirectory(INDEXPATH);
        IndexWriter writer = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
        Document doc = new Document();
        doc.add(new StringField("did", String.valueOf(diary.getDid()), Field.Store.YES));
        doc.add(new TextField("title", diary.getTitle(), Field.Store.YES));
        doc.add(new StringField("releaseDate", DateUtil.formatDate(diary.getReleaseDate(), "yyyy-MM-dd"), Field.Store.YES));
        doc.add(new TextField("content", StringUtils.html2Text(diary.getContent()), Field.Store.YES));
        writer.addDocument(doc);
        writer.close();
    }

    /**
     * 删除指定日記的索引
     * @param did
     * @throws Exception
     */
    public void deleteIndex(String did) throws Exception {
        Directory directory = LuceneUtil.getDirectory(INDEXPATH);
        IndexWriter writer = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
        writer.deleteDocuments(new Term("did", did));
        writer.forceMergeDeletes();//強制删除
        writer.commit();
        writer.close();
    }

    /**
     * 更新日記索引
     * @throws Exception
     */
    public void updateIndex(Diary diary) throws Exception {
        Directory directory = LuceneUtil.getDirectory(INDEXPATH);
        IndexWriter writer = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
        Document doc = new Document();
        doc.add(new StringField("did", String.valueOf(diary.getDid()), Field.Store.YES));
        doc.add(new TextField("title", diary.getTitle(), Field.Store.YES));
        doc.add(new StringField("releaseDate", DateUtil.formatDate((Date) diary.getReleaseDate(), "yyyy-MM-dd"), Field.Store.YES));
        doc.add(new TextField("content", StringUtils.html2Text(diary.getContent()), Field.Store.YES));
        writer.updateDocument(new Term("did", String.valueOf(diary.getDid())), doc);
        writer.close();
    }

    /**
     * 将資料庫中所有的日記進行索引,然後存儲索引檔案到指定的位置
     * 當索引檔案丢失的時候使用
     */
    public void indexDiarys() throws Exception {
        System.out.println("-------------------------開始生成索引-------------------------");
        File file = new File(INDEXPATH);
        Directory directory = null;
        IndexWriter indexWriter = null;
        if (file != null) {
            FileUtils.deleteDirectory(file);
            directory = LuceneUtil.getDirectory(INDEXPATH);
            indexWriter = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
            List<Map<String, Object>> maps = diaryService.listPager(null, null);

            String contentText = "";
            for (Map diary : maps) {
                Document doc = new Document();
                doc.add(new StringField("did", String.valueOf(diary.get("did")), Field.Store.YES));
                doc.add(new TextField("title", (String) diary.get("title"), Field.Store.YES));
                doc.add(new StringField("releaseDate", DateUtil.formatDate((Date) diary.get("releaseDate"), "yyyy-MM-dd"), Field.Store.YES));
                contentText = Jsoup.parse((String) diary.get("content")).text();
                doc.add(new TextField("content", contentText, Field.Store.YES));
                indexWriter.addDocument(doc);
            }
        }
        LuceneUtil.close(indexWriter, directory);
        System.out.println("-------------------------結束生成索引-------------------------");
    }

    /**
     * 按關鍵字索引日記
     * @param q
     * @return
     * @throws Exception
     */
    public List<Diary> searchDiary(String q) throws Exception {
        Directory directory = LuceneUtil.getDirectory(INDEXPATH);
        DirectoryReader reader = LuceneUtil.getDirectoryReader(directory);
        IndexSearcher searcher = LuceneUtil.getIndexSearcher(reader);
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
		//拿一句話到索引目中的索引檔案中的詞庫進行關鍵詞碰撞
        Query query = new QueryParser("title", analyzer).parse(q);
        Query query2 = new QueryParser("content", analyzer).parse(q);
        BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
        booleanQuery.add(query, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query2, BooleanClause.Occur.SHOULD);

        //優先高亮title
        Highlighter highlighter = LuceneUtil.getHighlighter(query, "title");
        //組合高亮
        TopDocs topDocs = searcher.search(booleanQuery.build(), 100);
        //處理得分命中的文檔
        List<Diary> diaryList = new ArrayList<>();
        Diary diary = null;
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            Document doc = searcher.doc(scoreDoc.doc);
            diary = new Diary();
            diary.setDid(Integer.valueOf(doc.get("did")));
            diary.setReleaseDateStr(doc.get("releaseDate"));
            String title = doc.get("title");
            String content = StringEscapeUtils.escapeHtml4(doc.get("content"));
            if (title != null) {
                TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
                String hTitle = highlighter.getBestFragment(tokenStream, title);
                if (StringUtils.isBlank(hTitle)) {
                    diary.setTitle(title);
                } else {
                    diary.setTitle(hTitle);
                }
            }

            if (content != null) {
                TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
                String hContent = highlighter.getBestFragment(tokenStream, content);
                if (StringUtils.isBlank(hContent)) {
                    if (content.length() <= 200) {
                        diary.setContent(content);
                    } else {
                        diary.setContent(content.substring(0, 200));
                    }
                } else {
                    diary.setContent(hContent);
                }
            }
            diaryList.add(diary);
        }
        LuceneUtil.close(reader, directory);
        return diaryList;
    }
}
           

3、對應的controller層、service實作類代碼(因為我有部分邏輯是在service實作類裡進行處理的)

  • controller層:
/**
     * 儲存或者修改日記
     * @param req
     * @param diary
     * @return
     */
    @RequestMapping("/save")
    public String save(HttpServletRequest req,Diary diary) {
        this.diaryService.save(diary);
        req.setAttribute("view", "/diary/edit");
        return "redirect:/user/mainTemp";
    }

    /**
     * 删除日記
     * @param req
     * @param did
     * @return
     */
    @RequestMapping("/del/{did}")
    public String del(HttpServletRequest req,@PathVariable("did")Integer did) {
        this.diaryService.del(did);
        return "redirect:/user/mainTemp";
    }

    /**
     * 将全部的部落格索引檔案重新生成
     * @return
     */
    @ResponseBody
    @RequestMapping("/indexDiarys")
    public Map indexDiarys(){
        Map map = new HashMap();
        try {
            diaryIndex.indexDiarys();
            map.put("success",true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return map;
    }

    /**
     * 根據關鍵字查詢相關部落格資訊
     * @param q
     * @return
     * @throws Exception
     */
    @RequestMapping("/q")
    public ModelAndView search(@RequestParam(value="q",required=false) String q, HttpServletRequest request)throws Exception{
        PageBean pageBean = new PageBean();
        pageBean.setRequest(request);
        ModelAndView mav=new ModelAndView("mainTemp");
        mav.addObject("view", "/diary/result");

        //開始索引日記
        List<Diary> diaryList=diaryIndex.searchDiary(q);

        pageBean.setTotal(diaryList.size());
        int endIndex = Math.min(pageBean.getPage() * pageBean.getRows(), diaryList.size());
        //查詢出符合條件的所有記錄然後進行截取
        mav.addObject("diaryList", diaryList.subList(pageBean.getStartIndex(), endIndex));
		//上一頁、下一頁的連結
        mav.addObject("q", q);
        mav.addObject("resultTotal", pageBean.getTotal());
        mav.addObject("pageCode", PageUtil.createPageCode(pageBean));
        return mav;
    }
           
  • service實作類:
@Override
    public int save(Diary diary) {
        try {
            if (diary.getDid() == null || diary.getDid() == 0){
                int i = this.diaryMapper.add(diary);
                diaryIndex.addIndex(diary);
                return i;
            }
            else {
                diaryIndex.updateIndex(diary);
                return this.diaryMapper.update(diary);
            }
        }catch (Exception e){
            e.printStackTrace();
            return 0;
        }
    }

    @Override
    public int del(Integer did) {
        try {
            diaryIndex.deleteIndex(did + "");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return this.diaryMapper.del(did);
    }
           

4、實作後的效果

這是我在之前的記事本系統的基礎上将Lucene整合進來的,之前的效果:

https://blog.csdn.net/weixin_42687829/article/details/90550886

整合Lucene後的效果:

Lucene使用案例(包括索引的維護)

5、過程中需要注意的問題

Lucene使用案例(包括索引的維護)
Lucene使用案例(包括索引的維護)

繼續閱讀