最近在做lucene,發現網上的lucene執行個體都不是很滿意,是以自己做了個 ,如果哪有問題可以指出來
建立索引
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class AddIndex {
//path為索引存放位址
public void addIndex(String path) {
try{
Directory fsDir = FSDirectory.open(new File(path));
//記住,此處的分詞器一定要和下面查詢的分詞器一緻,否則會查不到資料
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriter writer = new IndexWriter(fsDir, conf);
System.out.println("~~~建立索引~~~");
Document document1 = new Document();
document1.add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
document1.add(new Field("address", "中國四川省成都市金牛區青羊東二路", Field.Store.YES, Field.Index.ANALYZED));
Document document2 = new Document();
document2.add(new Field("id", "2", Field.Store.YES, Field.Index.NO));
document2.add(new Field("address", "中國四川省成都市金牛區永陵路", Field.Store.YES, Field.Index.ANALYZED));
Document document3 = new Document();
document3.add(new Field("id", "3", Field.Store.YES, Field.Index.NO));
document3.add(new Field("address", "中國四川省成都市金牛區一環路西三段", Field.Store.YES, Field.Index.ANALYZED));
Document document4 = new Document();
document4.add(new Field("id", "4", Field.Store.YES, Field.Index.NO));
document4.add(new Field("address", "中國四川省成都市金牛區營門口路", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(document1);
writer.addDocument(document2);
writer.addDocument(document3);
writer.addDocument(document4);
writer.forceMerge(1);
writer.close();
System.out.println("~~~索引建立完成~~~");
}catch (IOException e) {
System.out.println(e.toString());
}
}
}
查詢資料
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
private static String path = "e:\\lucene\\addressStore";
Analyzer analyzer = new IKAnalyzer();
private static File dataFile = new File(path);
private static String str = "中國四川省成都市金牛區營門口路";
private static String fiels = "address";
public static void main(String[] args) {
new AddIndex().addIndex(path);
try {
new TestLucene().search(str);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
public void search(String keyword) throws IOException, ParseException {
Analyzer analyzer = new IKAnalyzer();
IndexSearcher isearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(dataFile)));
//此處隻需把分詞器傳進去,lucene會自動分詞
QueryParser parser = new QueryParser(Version.LUCENE_35, fiels,analyzer);
Query query = parser.parse(keyword);
System.out.println(query.toString());
/**
* 執行搜尋,擷取查詢結果集對象 10為前10條記錄
*/
TopDocs topDocs = isearcher.search(query, 10);
ScoreDoc[] hits = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : hits) {
System.out.println("----------------分割線----------------------");
Document hitDoc = isearcher.doc(scoreDoc.doc);
float i = scoreDoc.score;
String address = hitDoc.get("address");
System.out.println("address:" + address+"\nsocre:"+i);
//列印打分細節,不需要可以去掉
//int docId = scoreDoc.doc;
//Explanation exp = isearcher.explain(query,docId);
//System.out.println(exp.toString());
}
isearcher.close();
}
}
查詢結果
~~~建立索引~~~
~~~索引建立完成~~~
address:中國 address:國四 address:四川省 address:四川 address:四 address:省成 address:成都市 address:成都 address:都市 address:金牛區 address:金牛 address:營 address:門口 address:路
----------------分割線----------------------
address:中國四川省成都市金牛區營門口路
socre:0.9141956
----------------分割線----------------------
address:中國四川省成都市金牛區永陵路
socre:0.44761625
----------------分割線----------------------
address:中國四川省成都市金牛區青羊東二路
socre:0.39166427
----------------分割線----------------------
address:中國四川省成都市金牛區一環路西三段
socre:0.31202385