天天看點

lucene3.5分詞+搜尋

最近在做lucene,發現網上的lucene執行個體都不是很滿意,是以自己做了個 ,如果哪有問題可以指出來

建立索引

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;


public class AddIndex {
	//path為索引存放位址
	public  void addIndex(String path) {
		try{
			Directory fsDir = FSDirectory.open(new File(path));
             //記住,此處的分詞器一定要和下面查詢的分詞器一緻,否則會查不到資料
			Analyzer analyzer = new IKAnalyzer();
			IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
			IndexWriter writer = new IndexWriter(fsDir, conf);
			System.out.println("~~~建立索引~~~");
			Document document1 = new Document();
			document1.add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
			document1.add(new Field("address", "中國四川省成都市金牛區青羊東二路", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document2 = new Document();
			document2.add(new Field("id", "2", Field.Store.YES, Field.Index.NO));
			document2.add(new Field("address", "中國四川省成都市金牛區永陵路", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document3 = new Document();
			document3.add(new Field("id", "3", Field.Store.YES, Field.Index.NO));
			document3.add(new Field("address", "中國四川省成都市金牛區一環路西三段", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document4 = new Document();
			document4.add(new Field("id", "4", Field.Store.YES, Field.Index.NO));
			document4.add(new Field("address", "中國四川省成都市金牛區營門口路", Field.Store.YES, Field.Index.ANALYZED));
			
			writer.addDocument(document1);  
			writer.addDocument(document2);  
			writer.addDocument(document3);  
			writer.addDocument(document4);  
			
			writer.forceMerge(1);
			writer.close();
			System.out.println("~~~索引建立完成~~~");
		}catch (IOException e) {
			System.out.println(e.toString());
		} 
	}
}
           

查詢資料

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {
	private static String path = "e:\\lucene\\addressStore";
	Analyzer analyzer = new IKAnalyzer();
	private static File dataFile = new File(path);
	private static String str = "中國四川省成都市金牛區營門口路";
	private static String fiels = "address";
	public static void main(String[] args) {
		new AddIndex().addIndex(path);
		try {
			new TestLucene().search(str);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	public void search(String keyword) throws IOException, ParseException {
		Analyzer analyzer = new IKAnalyzer();
		IndexSearcher isearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(dataFile)));
        //此處隻需把分詞器傳進去,lucene會自動分詞
		QueryParser parser = new QueryParser(Version.LUCENE_35, fiels,analyzer);
		Query query = parser.parse(keyword);
		System.out.println(query.toString());
			/**
			 * 執行搜尋,擷取查詢結果集對象 10為前10條記錄
			 */
		TopDocs topDocs = isearcher.search(query, 10);
		ScoreDoc[] hits = topDocs.scoreDocs;
		for (ScoreDoc scoreDoc : hits) {
			System.out.println("----------------分割線----------------------");
			Document hitDoc = isearcher.doc(scoreDoc.doc);
			float i = scoreDoc.score;
			String address = hitDoc.get("address");
			System.out.println("address:" + address+"\nsocre:"+i);
			//列印打分細節,不需要可以去掉
			//int docId = scoreDoc.doc;  
			//Explanation exp = isearcher.explain(query,docId); 
			//System.out.println(exp.toString());
		}

		isearcher.close();
	}
}
           

 查詢結果

~~~建立索引~~~

~~~索引建立完成~~~

address:中國 address:國四 address:四川省 address:四川 address:四 address:省成 address:成都市 address:成都 address:都市 address:金牛區 address:金牛 address:營 address:門口 address:路

----------------分割線----------------------

address:中國四川省成都市金牛區營門口路

socre:0.9141956

----------------分割線----------------------

address:中國四川省成都市金牛區永陵路

socre:0.44761625

----------------分割線----------------------

address:中國四川省成都市金牛區青羊東二路

socre:0.39166427

----------------分割線----------------------

address:中國四川省成都市金牛區一環路西三段

socre:0.31202385