hibernate search 和lucene结合使用实例

作者: fyg0072
发布时间:2015-07-08 18:29:04

以下的代码是根据api帮助文档作出的一个简单实例,在应用方面可以实现创建索引,搜索,过滤和高亮的功能。

整体的环境为:spring2.5.6,hibernate3.3.1,struts2.0.8,lucene2.4.1

第一步,首先是web.xml配置文件,由于使用了ssh2的架构,所以不得不在web.xml里配置一些东西

]<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee"
 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:schemaLocation="http://java.sun.com/xml/ns/javaee
 http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd">

 <!-- spring的配置文件路径 -->
 <context-param>
  <param-name>contextConfigLocation</param-name>
  <param-value>classpath*:spring/*.xml</param-value>
 </context-param>
 
 <!--Hibernate Open Session in View Filter-->
 <filter>
  <filter-name>hibernateFilter</filter-name>
  <filter-class>
   org.springframework.orm.hibernate3.support.OpenSessionInViewFilter
  </filter-class>
 </filter>
 <filter-mapping>
  <filter-name>hibernateFilter</filter-name>
  <url-pattern>*.action</url-pattern>
  <dispatcher>REQUEST</dispatcher>
  <dispatcher>FORWARD</dispatcher>
 </filter-mapping>
 <filter-mapping>
  <filter-name>hibernateFilter</filter-name>
  <url-pattern>*.jsp</url-pattern>
  <dispatcher>REQUEST</dispatcher>
  <dispatcher>FORWARD</dispatcher>
 </filter-mapping>
 
 <listener>
  <listener-class>
   org.springframework.web.context.ContextLoaderListener
  </listener-class>
 </listener>

 <!-- Spring 刷新Introspector防止内存泄露 -->
 <listener>
  <listener-class>
   org.springframework.web.util.IntrospectorCleanupListener
  </listener-class>
 </listener>

 <!-- Struts Action Mapping-->
 <filter>
  <filter-name>struts-cleanup</filter-name>
  <filter-class>
   org.Apache.struts2.dispatcher.ActionContextCleanUp
  </filter-class>
 </filter>
 <filter>
  <filter-name>struts2</filter-name>
  <filter-class>
   org.apache.struts2.dispatcher.FilterDispatcher
  </filter-class>
 </filter>

 <filter-mapping>
  <filter-name>struts-cleanup</filter-name>
  <url-pattern>/*</url-pattern>
 </filter-mapping>
 <filter-mapping>
  <filter-name>struts2</filter-name>
  <url-pattern>*.jsp</url-pattern>
  <dispatcher>REQUEST</dispatcher>
  <dispatcher>FORWARD</dispatcher>
 </filter-mapping>
 <filter-mapping>
  <filter-name>struts2</filter-name>
  <url-pattern>*.action</url-pattern>
  <dispatcher>REQUEST</dispatcher>
  <dispatcher>FORWARD</dispatcher>
 </filter-mapping>

<!-- spring自带的字符转换过滤器,转换成utf-8的格式 -->
 <filter>
  <filter-name>encodingFilter</filter-name>
  <filter-class>
   org.springframework.web.filter.CharacterEncodingFilter
  </filter-class>
  <init-param>
   <param-name>encoding</param-name>
   <param-value>UTF-8</param-value>
  </init-param>
 </filter>
 <filter-mapping>
  <filter-name>encodingFilter</filter-name>
  <url-pattern>/*</url-pattern>
 </filter-mapping>

 <!-- 随服务器启动,自动调用对应的servlet创建索引文件 -->

 <servlet>
  <servlet-name>CreateHibernateIndex</servlet-name>
  <servlet-class>com.test.servlet.CreateHibernateIndex</servlet-class>
  <load-on-startup>20</load-on-startup>
 </servlet>
 <servlet-mapping>
  <servlet-name>CreateHibernateIndex</servlet-name>
  <url-pattern>/servlet/CreateHibernateIndex</url-pattern>
 </servlet-mapping>

 <!-- session超时定义,单位为分钟 -->
 <session-config>
  <session-timeout>20</session-timeout>
 </session-config>
 <!-- 默认首页定义 -->
 <welcome-file-list>
  <welcome-file>/index.jsp</welcome-file>
 </welcome-file-list>

</web-app>


第二步,配spring配置文件和hibernate文件

这是可以使用hibernate annotation注释的sessionFactory的属性配置的一部分,注意下面的2个使用索引的属性配置,提供文件索引的保存路径和读取方式(fsdirectory,文件索引,另外一种是ramdirectory,内存索引)
Java 代码


<prop
     key="hibernate.search.default.directory_provider">
     org.hibernate.search.store.FSDirectoryProvider
    </prop>
    <prop key="hibernate.search.default.indexBase">
     ${hibernate.search.default.indexBase}
    </prop>


  
spring的配置文件没有什么特别的,和普通ssh配置没有什么两样

第三步配struts配置文件,由于也是普通配置,没有特别之处,就不贴出来了。

第四步,写实体类,由于采用hibernate search方法搜索,所以直接利用hibernate annotation注释去定义索引的一些配置信息。关于index的基本都属于索引的配置

Java 代码


package com.test.model;

import static javax.persistence.GenerationType.IDENTITY;

import java.util.Date;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import javax.persistence.Transient;

import org.hibernate.search.annotations.Analyzer;
import org.hibernate.search.annotations.DateBridge;
import org.hibernate.search.annotations.DocumentId;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.Resolution;
import org.hibernate.search.annotations.Store;
import org.wltea.analyzer.lucene.IKAnalyzer;


/**
 * Product entity.
 */
@Entity
@Table(name = "product", catalog = "hibernate_search_test")
@Indexed(index = "Product")
@Analyzer (impl = IKAnalyzer.class )
public class Product implements java.io.Serializable {

 // Fields

 /**
  *
  */
 private static final long serialVersionUID = -7005490272739421758L;
 private Integer id;
 private String proTitle;
 private String proDescn;
 private String proPrice;
 private Integer proType;
 private Date proTime;
 private String findResult;

 // Constructors

 /** default constructor */
 public Product() {
 }

 // Property accessors
 @Id
 @GeneratedValue(strategy = IDENTITY)
 @Column(name = "id")
 @DocumentId
 public Integer getId() {
  return this.id;
 }

 public void setId(Integer id) {
  this.id = id;
 }

 @Column(name = "pro_title")
 @Field(name = "pt", index = Index.TOKENIZED, store = Store.YES)
 public String getProTitle() {
  return this.proTitle;
 }

 public void setProTitle(String proTitle) {
  this.proTitle = proTitle;
 }

 @Column(name = "pro_descn")
 @Field(name = "pd", index = Index.TOKENIZED, store = Store.YES)
 public String getProDescn() {
  return this.proDescn;
 }

 public void setProDescn(String proDescn) {
  this.proDescn = proDescn;
 }

 @Column(name = "pro_price")
 public String getProPrice() {
  return this.proPrice;
 }

 public void setProPrice(String proPrice) {
  this.proPrice = proPrice;
 }

 @Column(name = "pro_type")
 public Integer getProType() {
  return this.proType;
 }

 public void setProType(Integer proType) {
  this.proType = proType;
 }

 @Temporal(TemporalType.DATE)
 @Column(name = "pro_time")
 @Field(name = "t", index = Index.UN_TOKENIZED, store = Store.YES)
 @DateBridge(resolution = Resolution.DAY)
 public Date getProTime() {
  return this.proTime;
 }

 public void setProTime(Date proTime) {
  this.proTime = proTime;
 }

//封装搜索出的高亮内容
 @Transient
 public String getFindResult() {
  return findResult;
 }

 public void setFindResult(String findResult) {
  this.findResult = findResult;
 }
}


第六步,写service方法,包括建索引,根据关键字用索引查,过滤,设置权重,高亮等等工作
Java代码

package com.test.service;

import java.io.File;
import java.io.StringReader;
import java.util.Date;
import java.util.List;

import javax.annotation.Resource;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.hibernate.CacheMode;
import org.hibernate.FlushMode;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.search.FullTextQuery;
import org.hibernate.search.FullTextSession;
import org.hibernate.search.Search;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springside.modules.orm.hibernate.HibernateDao;
import org.springside.modules.service.EntityManager;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.test.dao.ProductDao;
import com.test.model.Product;

@Transactional
@Service
public class ProductService extends EntityManager<Product, Integer> {
 @Resource(name = "productDao")
 private ProductDao productDao;

 @Override
 protected HibernateDao<Product, Integer> getEntityDao() {
  // TODO Auto-generated method stub
  return productDao;
 }

 @SuppressWarnings("unchecked")
 public List<Product> QueryByIndex(String words, String startDate,String endDate) throws Exception {
  FullTextSession fullTextSession = Search.createFullTextSession(productDao.getSession());

  /*Query IKQuery = IKQueryParser.parseMultiField(new String[] {
    "proTitle", "proDescn" }, new String[] { words, words },
    new BooleanClause.Occur[] { Occur.SHOULD, Occur.SHOULD });

  Query luceneQuery = MultiFieldQueryParser.parse(new String[] { words,
    words }, new String[] { "pro_title", "pro_descn" },
    new BooleanClause.Occur[] { Occur.SHOULD, Occur.SHOULD },
    new StandardAnalyzer());*/

  BooleanQuery bQuery = new BooleanQuery();
  Analyzer analyzer = new IKAnalyzer();
  //设置对域采用的某种分词器的QueryParser对象
  QueryParser qp;
  //设置了关键字的查询您对象
  //Query q;
 
  qp = new QueryParser(Version.LUCENE_CURRENT,"pt",analyzer);
  Query q1 = qp.parse(words);
  q1.setBoost(1.5f);
  bQuery.add(q1, Occur.SHOULD);
 
  qp = new QueryParser(Version.LUCENE_CURRENT,"pd",analyzer);
  Query q2 = qp.parse(words);
  q2.setBoost(1.0f);
  bQuery.add(q2, Occur.SHOULD);
 
  FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(bQuery,Product.class);
  // 添加是或者否的条件到query中
  boolean filterResult = false;
  BooleanQuery bQueryForFilter = new BooleanQuery();

  if (!startDate.equalsIgnoreCase("") && !endDate.equalsIgnoreCase("")) {
   // 时间过滤
   // RangeFilter rangefilter = new RangeFilter("pro_time",
   // "20090927","20090929", false, false);
   // 只能使用一个过滤器,所以只能用下面的RangeQuery,然后将所有query封装到一个过滤条件中
   TermRangeQuery rangeQuery = new TermRangeQuery("t",startDate,endDate,true,true);
   bQueryForFilter.add(rangeQuery, BooleanClause.Occur.MUST);
   filterResult = true;
  }
  if (filterResult) {
   // 将booleanQuery封装到Filter中
   Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(bQueryForFilter));
   fullTextQuery.setFilter(filter);
  }
 
  List<Product> result = fullTextQuery.list();
  String findResult;

//根据上边已经写好的query封装出一个查询计分器
  QueryScorer qs1 = new QueryScorer(q1);
  QueryScorer qs2 = new QueryScorer(q2);
  //设置高亮的模板,其实就是在关键字两边加一对html的格式标签,下面是最基本的加粗。
  Formatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
 
  Highlighter highlighter1 = new Highlighter(formatter,qs1);
  Highlighter highlighter2 = new Highlighter(formatter,qs2);
  String text;

//下面通过将上面根据关键字,过滤条件和权重排序等找出的结果集做一次循环,进行高亮,把高亮后得到的

//一个字符串,封装如每个实体类中的一个额外字段,方便在页面输出。
  for(Product product:result){
   text = product.getProTitle() ;
   findResult = highlighter1.getBestFragment(analyzer,"pt", text);
   if(findResult==null){
    text = product.getProDescn() ;
    highlighter2.setTextFragmenter(new SimpleFragmenter(30));
    findResult = highlighter2.getBestFragment(analyzer,"pd", text);
   }
   product.setFindResult(findResult);
  } 
  return result;
 }

//下面的方法是用hibernate search的方法来创建索引

 public void createIndexByHibernateSearch() {

  long startTime = new Date().getTime();
  int BATCH_SIZE = 1000;
  FullTextSession s = Search.createFullTextSession(productDao.getSession());

  // Transaction tr = s.beginTransaction();
  s.setFlushMode(FlushMode.MANUAL);
  s.setCacheMode(CacheMode.IGNORE);
  ScrollableResults results = s.createQuery("from Product").setFetchSize(BATCH_SIZE).scroll(ScrollMode.FORWARD_ONLY);
  int index = 0;
  while (results.next()) {
   index++;
   s.index(results.get(0)); // index each element
   if (index % BATCH_SIZE == 0) {
    // s.flushToIndexes(); //apply changes to indexes
    s.clear(); // clear since the queue is processed
   }
  }
  s.clear();
  long endTime = new Date().getTime();
  logger.warn("建立Product索引 , 这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
  // tr.commit();

 }

//下面的方法是用lucene的方式来创建索引文件,不过用这种方式创建索引后,也只能使用lucene的方式去进行搜索

 @SuppressWarnings("deprecation")
 public void createIndexByLucene() {
  try {
   File fsDir = new File("E://indexes//product");
   Analyzer analyzer = new IKAnalyzer();

  
   /* // 内存索引
      RAMDirectory ramDir = new RAMDirectory();
   IndexWriter ramWriter = new IndexWriter(ramDir, luceneAnalyzer,
     true, IndexWriter.MaxFieldLength.UNLIMITED);
   */

   IndexWriter fsWriter = new IndexWriter(
     FSDirectory.open(fsDir),
     analyzer,
     true,
     IndexWriter.MaxFieldLength.UNLIMITED
    );
   fsWriter.setMaxBufferedDocs(1000);
   fsWriter.setMergeFactor(1000);

   List<Product> productList = find("from Product");
   int size = productList.size();
   long startTime = new Date().getTime();
   Document doc;
   for (Product product : productList) {
    doc = new Document();
    doc.add(new Field("pro_title", product.getProTitle(),Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("pro_descn", product.getProDescn(),Field.Store.YES, Field.Index.ANALYZED));
    if(product.getProTime()!=null)
    doc.add(new Field("pro_time",DateTools.dateToString( product.getProTime(), Resolution.DAY),Field.Store.YES, Field.Index.NOT_ANALYZED));
    fsWriter.addDocument(doc);

    // 先缓存入内存索引,后写入文件索引
   /* ramWriter.addDocument(doc);
    int i = 1;
    i++;
    if (i % 100 == 0 || i == size) {
     logger.warn("i:" + i);
     ramWriter.close();
     fsWriter.addIndexesNoOptimize(new Directory[] { ramDir });
     ramWriter = new IndexWriter(ramDir, new StandardAnalyzer(),
       true, IndexWriter.MaxFieldLength.UNLIMITED);
    }*/


   }
   // 自动优化合并索引文件
   fsWriter.optimize();
   fsWriter.close();

   long endTime = new Date().getTime();
   System.out.println("一共" + size + ",这花费了" + (endTime - startTime)
     + " 毫秒来把文档增加到索引里面去!");
  

  } catch (Exception e) {
   e.printStackTrace();
  }
 }
 
 public void SearchByLucene(){
  createIndexByLucene();
  File fsDir = new File("E://luceneIndexes//product");
  Analyzer analyzer = new IKAnalyzer();
  try{
   // 索引查询
   IndexReader reader = IndexReader.open(FSDirectory.open(fsDir), true); // only searching, so read-only=true
   IndexSearcher isearcher = new IndexSearcher(reader);
  
   BooleanQuery booleanQuery = new BooleanQuery();
   QueryParser parser;
   Query query;
  
   parser = new QueryParser(Version.LUCENE_CURRENT,"pro_title",analyzer);
   query = parser.parse("大灯");// 检索词
   query.setBoost(1.5f);
   booleanQuery.add(query, Occur.SHOULD);
  
   parser = new QueryParser(Version.LUCENE_CURRENT,"pro_descn",analyzer);  
   query = parser.parse("大灯");// 检索词
   query.setBoost(1.0f);
   booleanQuery.add(query, Occur.SHOULD);
  
   BooleanQuery filterBooleanQuery = new BooleanQuery();
   TermRangeQuery rangeQuery = new TermRangeQuery("pro_time","20090101","20091101",true,true);
   filterBooleanQuery.add(rangeQuery, BooleanClause.Occur.MUST);
  
   // 将booleanQuery封装到Filter中
   Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(filterBooleanQuery));
  
   TopScoreDocCollector collector = TopScoreDocCollector.create(100,true);   
  
   isearcher.search(booleanQuery,filter,collector);
  
   ScoreDoc[] hits = collector.topDocs(0,100).scoreDocs;
   QueryScorer qs = new QueryScorer(new TermQuery(new Term("pro_title","大灯")));
  
   for(ScoreDoc h:hits){
    Document d = isearcher.doc(h.doc);
    String text = d.get("pro_title") ;
    Formatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
   
    Highlighter hl = new Highlighter(formatter,qs);
   
    System.out.println(hl.getBestFragment(analyzer,"pro_title", text));
    //System.out.println("doc:"+h.doc+"  /tscore:"+h.score+"      /t"+d.get("pro_title"));
   }
   System.out.println("命中:" + hits.length);
   isearcher.close();
  
  }catch(Exception e){
   e.printStackTrace();
  }
 
 }

 // 查看分词效果
 @SuppressWarnings("deprecation")
 public static void showAnalyzerResult(Analyzer analyzer, String s)
   throws Exception {
  StringReader reader = new StringReader(s);
  TokenStream ts = analyzer.tokenStream(s, reader);
  Token t = ts.next();
  while (t != null) {
   System.out.print(t.termText() + "   ");
   t = ts.next();
  }
  System.out.println();
 }

 public static void main(String[] args) {
  ApplicationContext ctx = new ClassPathXmlApplicationContext("spring/applicationContext.xml");
  ProductService service = (ProductService) ctx.getBean("productService");

  service.SearchByLucene();

 }
}

标签: Hibernate Lucene
来源:http://blog.csdn.net/fyg0072/article/details/5712099

推荐: