【第一节】Lucene5教程经典源代码

javazx · 发表于 2016-6-20 14:48:01

1、lucene.apache.org
2、5.3.1版本
3、源码：
（0）pom：
<dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-core</artifactId>
      <version>5.3.1</version>
</dependency>

<dependency>
<groupId>org.apache.lucene</groupId>
      <artifactId>lucene-queryparser</artifactId>
      <version>5.3.1</version>
</dependency>

<dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-analyzers-common</artifactId>
      <version>5.3.1</version>
</dependency>

（1）写入：
import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Indexer {

private IndexWriter writer; // 写索引实例

/**
   * 构造方法实例化IndexWriter
   * @param indexDir
   * @throws Exception
   */
public Indexer(String indexDir)throws Exception{
      Directory dir=FSDirectory.open(Paths.get(indexDir));
      Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
      IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
      writer=new IndexWriter(dir, iwc);
}

/**
   * 关闭写索引
   * @throws Exception
   */
public void close()throws Exception{
      writer.close();
}

/**
   * 索引指定目录的所有文件
   * @param dataDir
   * @throws Exception
   */
public int index(String dataDir)throws Exception{
      File []files=new File(dataDir).listFiles();
      for(File f:files){
         indexFile(f);
      }
      return writer.numDocs();
}

/**
   * 索引指定文件
   * @param f
   */
private void indexFile(File f) throws Exception{
      System.out.println("索引文件："+f.getCanonicalPath());
      Document doc=getDocument(f);
      writer.addDocument(doc);
}

/**
   * 获取文档，文档里再设置每个字段
   * @param f
   */
private Document getDocument(File f)throws Exception {
      Document doc=new Document();
      doc.add(new TextField("contents",new FileReader(f)));
      doc.add(new TextField("fileName", f.getName(),Field.Store.YES));
      doc.add(new TextField("fullPath",f.getCanonicalPath(),Field.Store.YES));
      return doc;
}

public static void main(String[] args) {
      String indexDir="D:\\lucene";
      String dataDir="D:\\lucene\\data";
      Indexer indexer=null;
      int numIndexed=0;
      long start=System.currentTimeMillis();
      try {
         indexer = new Indexer(indexDir);
         numIndexed=indexer.index(dataDir);
      } catch (Exception e) {
         // TODO Auto-generated catch block
         e.printStackTrace();
      }finally{
         try {
            indexer.close();
         } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
         }
      }
      long end=System.currentTimeMillis();
      System.out.println("索引："+numIndexed+" 个文件花费了"+(end-start)+" 毫秒");
}
}

（2）、查询：
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Searcher {

public static void search(String indexDir,String q)throws Exception{
      Directory dir=FSDirectory.open(Paths.get(indexDir));
      IndexReader reader=DirectoryReader.open(dir);
      IndexSearcher is=new IndexSearcher(reader);
      Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
      QueryParser parser=new QueryParser("contents", analyzer);
      Query query=parser.parse(q);
      long start=System.currentTimeMillis();
      TopDocs hits=is.search(query, 10);
      long end=System.currentTimeMillis();
      System.out.println("匹配 "+q+" ，总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录");
      for(ScoreDoc scoreDoc:hits.scoreDocs){
         Document doc=is.doc(scoreDoc.doc);
         System.out.println(doc.get("fullPath"));
      }
      reader.close();
}

public static void main(String[] args) {
      String indexDir="D:\\lucene";
      String q="Zygmunt Saloni";
      try {
         search(indexDir,q);
      } catch (Exception e) {
         // TODO Auto-generated catch block
         e.printStackTrace();
      }
}
}

背包走的缓慢 · 发表于 2016-11-20 16:39:58

不错好资源可以用

		自动登录	找回密码
密码			立即注册

【第一节】Lucene5教程经典源代码

相关帖子

宣传达人

突出贡献

优秀版主

荣誉管理

论坛元老

【第一节】Lucene5教程 经典源代码

相关帖子

宣传达人

突出贡献

优秀版主

荣誉管理

论坛元老

【第一节】Lucene5教程经典源代码