Lucene4.7.2实例之创建索引

来源:互联网 时间:1970-01-01

最近学习lucene,发现不同版本语法变化比较大。在网上查找资料都比较老,下载最新的lucene发现jdk版本也必须更新,由于我的电脑是window64+jdk1.6,所以需要找相应的版本,我发现Lucene4.7.2之后的版本都要求是jdk1.7,所以选择lucene4.7.2做实例,我查看了官方的文档,做的这个实例




import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


public class IndexFiles {
    public static void createIndex() throws IOException {
String docsPath = "D://lucene-test//testFile";
String indexPath = "D://lucene-test//index";
File docDir = new File(docsPath);
if (!docDir.exists() || !docDir.canRead()) {
   System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
}
Directory dir = FSDirectory.open(new File(indexPath));


Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);


IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
writer.close();
    }


    private static void indexDocs(IndexWriter writer, File file) {
// do not try to index files that cannot be read
if (file.canRead()) {
   if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
   for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
   }
}
   } else {


FileInputStream fis = null;
try {
   fis = new FileInputStream(file);


   Document doc = new Document();


   Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
   doc.add(pathField);
   doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
   doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));


   if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old
// document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
   } else {
// Existing index (an old copy of this document may have
// been indexed) so
// we use updateDocument instead to replace the old one
// matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
   }


} catch (FileNotFoundException fnfe) {
   // checking if the file can be read doesn't help
   return;
} catch (UnsupportedEncodingException e) {
   e.printStackTrace();
} catch (IOException e) {
   e.printStackTrace();
} finally {
   try {
fis.close();
   } catch (IOException e) {
e.printStackTrace();
   }
}
   }
}
    }


    public static void main(String[] args) {
try {
   createIndex();
} catch (IOException e) {
   e.printStackTrace();
}
    }
}


相关阅读:
Top