第二章:lucene构建索引(新增、查询、更新、删除)

来源:互联网 时间:1970-01-01


package lucene2;import java.io.IOException;import junit.framework.TestCase;import org.apache.lucene.analysis.WhitespaceAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;import org.junit.Test;/** * 构建索引(基本的新增、查询、更新、删除) * @author zhangwx * */public class IndexTest extends TestCase{ protected String[] ids = {"1","2"}; protected String[] unidexed = {"Netherlands","Italy"}; protected String[] unstored ={"Amsterdam has lots of bridges","Venice has lots of canals"}; protected String[] text = {"Amsterdam","Venice"}; private Directory directory;//索引目录 @Override protected void setUp() throws Exception { directory = new RAMDirectory();//内存索引 IndexWriter writer = getWriter(); for(int i=0;i<ids.length;i++){ Document doc = new Document(); doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("country",unidexed[i],Field.Store.YES,Field.Index.NO)); doc.add(new Field("contents",unstored[i],Field.Store.NO,Field.Index.ANALYZED)); doc.add(new Field("city",text[i],Field.Store.YES,Field.Index.ANALYZED)); writer.addDocument(doc); } writer.commit(); writer.close();//记住:writer用完随时关闭(可能造成writer.lock异常) } /** * 创建IndexWriter * @return IndexWriter * @throws Exception */ private IndexWriter getWriter() throws Exception{ IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35)); return new IndexWriter(directory,config); } protected int getHitCount(String filedName,String searchString) throws Exception{ IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); Term term = new Term(filedName,searchString); Query query = new TermQuery(term); int hitCount = hitCount(searcher, query); searcher.close(); reader.close(); return hitCount; } public static int hitCount(IndexSearcher searcher,Query query) throws IOException{ TopDocs search = searcher.search(query,1); return search.totalHits; } @Test public void testIndexWriter() throws Exception{ IndexWriter writer = getWriter(); assertEquals(ids.length, writer.numDocs());//确认写入的文档数 writer.close(); } @Test public void testIndexReader() throws Exception{ IndexReader reader = IndexReader.open(directory); assertEquals(ids.length, reader.maxDoc());//确认读取的最大文档数 assertEquals(ids.length, reader.numDocs());//确认有效的文档数 reader.close(); } @Test public void testIndexReader2() throws Exception{ int hitCount = getHitCount("contents","lots"); assertEquals(ids.length, hitCount); System.out.println(hitCount); } /** * 从索引中删除文档 优化操作前(未执行writer.optimize()之前被删除的文档仍然存在索引中只是状态被标记为已删除) * @throws Exception */ @Test public void testDeleteBeforeOptimize() throws Exception{ IndexWriter writer = getWriter(); assertEquals(2, writer.numDocs());//确认索引中的2个文档 writer.deleteDocuments(new Term("id","1"));//删除id=1的文档 writer.commit(); assertTrue(writer.hasDeletions());//确认被标记为删除 (是否存在被标记为删除的文档) assertEquals(2, writer.maxDoc());//确认删除后仍然有2个文档(一个文档被标记为删除,一个未删除) assertEquals(1, writer.numDocs());//确认剩余一个文档 writer.close(); } /** * 从索引中删除文档 优化操作后 (执行writer.optimize()后被删除的文档将会消失) * @throws Exception */ @Test public void testDeleteAfterOptimize() throws Exception{ IndexWriter writer = getWriter(); assertEquals(2, writer.numDocs());//删除前确认索引中两个文档 writer.deleteDocuments(new Term("id","1"));//删除id=1的文档 writer.optimize();//执行优化操作 writer.commit(); assertFalse(writer.hasDeletions());//确认是否存在被标记为删除的文档 assertEquals(1, writer.maxDoc());//确认文档总数 assertEquals(1, writer.numDocs());//确认可使用文档数 writer.close(); } /** * 更新索引(原理:先调用deleteDocuments(term) 再调用addDocument() 即先删除原有文档再添加) * 本例中用新文档来替换id为1的旧文档。 * @throws Exception */ @Test public void testUpdate() throws Exception{ assertEquals(1, getHitCount("city", "Amsterdam"));//更新前查询city=Amsterdam,确认存在 IndexWriter writer = getWriter(); Document doc = new Document(); doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("country","Netherlands",Field.Store.YES,Field.Index.NO)); doc.add(new Field("contents","Den Haag has a lot of musenums",Field.Store.NO,Field.Index.ANALYZED)); doc.add(new Field("city","Haag",Field.Store.YES,Field.Index.ANALYZED)); writer.updateDocument(new Term("id","1"), doc);//更新文档(先删除id=1的旧文档,再添加新doc) writer.close(); assertEquals(0, getHitCount("city", "Amsterdam"));//确认更新后city=Amsterdam记录不存在 assertEquals(1, getHitCount("city", "Haag"));//确认city=Den Haag的新闻档已添加 }}



相关阅读:
Top