r489 - in bobobrowselucenefacets: . src/main/java/org/bobo
Author: echatellier Date: 2011-08-26 14:41:10 +0200 (Fri, 26 Aug 2011) New Revision: 489 Url: http://nuiton.org/repositories/revision/sandbox/489 Log: Add lucene grouping and use it in bobo-browse Added: bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneGroupsFacets.java Modified: bobobrowselucenefacets/pom.xml bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneDateFacets.java Modified: bobobrowselucenefacets/pom.xml =================================================================== --- bobobrowselucenefacets/pom.xml 2011-08-25 15:37:20 UTC (rev 488) +++ bobobrowselucenefacets/pom.xml 2011-08-26 12:41:10 UTC (rev 489) @@ -11,6 +11,11 @@ <version>3.3.0</version> </dependency> <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-grouping</artifactId> + <version>3.3.0</version> + </dependency> + <dependency> <groupId>com.browseengine</groupId> <artifactId>bobo-browse</artifactId> <version>2.5.0</version> @@ -25,6 +30,7 @@ <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.0.1</version> + <scope>compile</scope> </dependency> </dependencies> </project> \ No newline at end of file Modified: bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneDateFacets.java =================================================================== --- bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneDateFacets.java 2011-08-25 15:37:20 UTC (rev 488) +++ bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneDateFacets.java 2011-08-26 12:41:10 UTC (rev 489) @@ -64,15 +64,21 @@ import com.browseengine.bobo.api.BrowseHit; import com.browseengine.bobo.api.BrowseRequest; import com.browseengine.bobo.api.BrowseResult; +import com.browseengine.bobo.api.BrowseSelection; import com.browseengine.bobo.api.FacetAccessible; import com.browseengine.bobo.api.FacetSpec; import com.browseengine.bobo.api.FacetSpec.FacetSortSpec; import com.browseengine.bobo.facets.FacetHandler; import com.browseengine.bobo.facets.impl.RangeFacetHandler; +import com.browseengine.bobo.facets.impl.SimpleFacetHandler; /** - * TODO add comment here. + * Test les facets sur les range de dates * + * Test les facets sur les champs qui "contiennent" une valeur: + * ex : resultat avec requete dans le champ 1 : 34 + * resultat avec requete dans le champ 2 : 234 + * * @author chatellier * @version $Revision$ * @@ -118,19 +124,22 @@ Document doc1 = new Document(); doc1.add(new Field("id", "1", Store.YES, Index.ANALYZED)); - doc1.add(new Field("author", "red", Store.YES, Index.ANALYZED)); + doc1.add(new Field("title", "test de title", Store.YES, Index.ANALYZED)); + doc1.add(new Field("body", "test de body", Store.YES, Index.ANALYZED)); doc1.add(new Field("date", DateTools.timeToString(now.getTime(),MILLISECOND), Store.YES, Index.ANALYZED)); writer.addDocument(doc1); Document doc2 = new Document(); doc2.add(new Field("id", "2", Store.YES, Index.ANALYZED)); - doc2.add(new Field("author", "blue", Store.YES, Index.ANALYZED)); + doc2.add(new Field("title", "un body blue", Store.YES, Index.ANALYZED)); + doc2.add(new Field("body", "le corps du message", Store.YES, Index.ANALYZED)); doc2.add(new Field("date", DateTools.timeToString(nowplus1.getTime(),MILLISECOND), Store.YES, Index.ANALYZED)); writer.addDocument(doc2); Document doc3 = new Document(); doc3.add(new Field("id", "3", Store.YES, Index.ANALYZED)); - doc3.add(new Field("author", "blue", Store.YES, Index.ANALYZED)); + doc3.add(new Field("title", "un corps large", Store.YES, Index.ANALYZED)); + doc2.add(new Field("body", "un body qui tue", Store.YES, Index.ANALYZED)); doc3.add(new Field("date", DateTools.timeToString(nowplus7.getTime(),MILLISECOND), Store.YES, Index.ANALYZED)); writer.addDocument(doc3); @@ -162,8 +171,9 @@ // color facet handler RangeFacetHandler dateHandler = new RangeFacetHandler("date", dateRange); + SimpleFacetHandler titleFacet = new SimpleFacetHandler("title"); - List<FacetHandler<?>> handlerList = Arrays.asList(new FacetHandler<?>[] { dateHandler }); + List<FacetHandler<?>> handlerList = Arrays.asList(new FacetHandler<?>[] { dateHandler, titleFacet }); // opening a lucene index IndexReader reader = IndexReader.open(getIndexDirectory(), true); @@ -177,9 +187,9 @@ br.setOffset(0); // add a selection - //BrowseSelection sel = new BrowseSelection("color"); - //sel.addValue("red"); - //br.addSelection(sel); + BrowseSelection sel = new BrowseSelection("title"); + sel.addValue("bodyintitle"); + br.addSelection(sel); // parse a query QueryParser parser = new QueryParser(Version.LUCENE_33, "content", analyser); @@ -189,8 +199,10 @@ // add the facet output specs FacetSpec dateSpec = new FacetSpec(); dateSpec.setOrderBy(FacetSortSpec.OrderHitsDesc); - br.setFacetSpec("date", dateSpec); + FacetSpec titleSpec = new FacetSpec(); + titleSpec.setOrderBy(FacetSortSpec.OrderHitsDesc); + br.setFacetSpec("title", titleSpec); // perform browse Browsable browser = new BoboBrowser(boboReader); @@ -203,6 +215,8 @@ FacetAccessible dateFacets = facetMap.get("date"); List<BrowseFacet> facetVals = dateFacets.getFacets(); - System.out.println("Facet " + facetVals); + for (BrowseFacet facetVal : facetVals) { + System.out.println("Facet " + facetVal.getValue() + "(" + facetVal.getFacetValueHitCount() + ")"); + } } } Added: bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneGroupsFacets.java =================================================================== --- bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneGroupsFacets.java (rev 0) +++ bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneGroupsFacets.java 2011-08-26 12:41:10 UTC (rev 489) @@ -0,0 +1,264 @@ +/* + * #%L + * + * + * $Id$ + * $HeadURL$ + * %% + * Copyright (C) 2011 Codelutin, Chatellier Eric + * %% + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/lgpl-3.0.html>. + * #L% + */ + +package org.bobo; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.CachingCollector; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.grouping.GroupDocs; +import org.apache.lucene.search.grouping.SearchGroup; +import org.apache.lucene.search.grouping.TermFirstPassGroupingCollector; +import org.apache.lucene.search.grouping.TermSecondPassGroupingCollector; +import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.util.Version; + +import com.browseengine.bobo.api.BoboBrowser; +import com.browseengine.bobo.api.BoboIndexReader; +import com.browseengine.bobo.api.Browsable; +import com.browseengine.bobo.api.BrowseException; +import com.browseengine.bobo.api.BrowseFacet; +import com.browseengine.bobo.api.BrowseRequest; +import com.browseengine.bobo.api.BrowseSelection; +import com.browseengine.bobo.api.FacetAccessible; +import com.browseengine.bobo.api.FacetSpec; +import com.browseengine.bobo.api.FacetSpec.FacetSortSpec; +import com.browseengine.bobo.facets.FacetHandler; +import com.browseengine.bobo.facets.impl.SimpleFacetHandler; + +/** + * Test les facets sur les range le collecteurs basé sur les groupes, en lien + * avec bobo. + * + * @author chatellier + * @version $Revision$ + * + * Last update : $Date$ + * By : $Author$ + */ +public class TestLuceneGroupsFacets { + + protected static Analyzer analyser = new StandardAnalyzer(Version.LUCENE_32); + + public static void main(String... args) throws IOException, ParseException, BrowseException { + create(); + search(); + } + + /** + * Get lucene index directory. + * + * @return lucene index directory + * @throws IOException + */ + protected static Directory getIndexDirectory() throws IOException { + File path = new File("/tmp/bobo/index"); + Directory directory = new NIOFSDirectory(path); + return directory; + } + + public static void create() throws CorruptIndexException, LockObtainFailedException, IOException { + + IndexWriter writer = new IndexWriter(getIndexDirectory(), new IndexWriterConfig(Version.LUCENE_33, analyser)); + writer.deleteAll(); + + Document doc1 = new Document(); + doc1.add(new Field("id", "1", Store.YES, Index.ANALYZED)); + doc1.add(new Field("subject", "s1", Store.YES, Index.ANALYZED)); + doc1.add(new Field("title", "serie : bing bang theory", Store.YES, Index.ANALYZED)); + writer.addDocument(doc1); + + Document doc2 = new Document(); + doc2.add(new Field("id", "2", Store.YES, Index.ANALYZED)); + doc2.add(new Field("subject", "s1", Store.YES, Index.ANALYZED)); + doc2.add(new Field("title", "serie : dr house ", Store.YES, Index.ANALYZED)); + writer.addDocument(doc2); + + Document doc3 = new Document(); + doc3.add(new Field("id", "3", Store.YES, Index.ANALYZED)); + doc3.add(new Field("subject", "s1", Store.YES, Index.ANALYZED)); + doc3.add(new Field("title", "serie : dexter", Store.YES, Index.ANALYZED)); + writer.addDocument(doc3); + + Document doc4 = new Document(); + doc4.add(new Field("id", "4", Store.YES, Index.ANALYZED)); + doc4.add(new Field("subject", "s2", Store.YES, Index.ANALYZED)); + doc4.add(new Field("title", "serie : true blood", Store.YES, Index.ANALYZED)); + writer.addDocument(doc4); + + Document doc5 = new Document(); + doc5.add(new Field("id", "5", Store.YES, Index.ANALYZED)); + doc5.add(new Field("subject", "s2", Store.YES, Index.ANALYZED)); + doc5.add(new Field("title", "film : independance day", Store.YES, Index.ANALYZED)); + writer.addDocument(doc5); + + + writer.optimize(); + writer.close(); + } + + public static void search() throws IOException, ParseException, BrowseException { + + // subject facet handler + SimpleFacetHandler subjectFacet = new SimpleFacetHandler("subject"); + + /*LinkedHashSet<String> groupbyNames = new LinkedHashSet<String>(); + groupbyNames.add("subject"); + SimpleGroupbyFacetHandler groupBy = new SimpleGroupbyFacetHandler("subject", groupbyNames);*/ + + List<FacetHandler<?>> handlerList = Arrays.asList(new FacetHandler<?>[] { subjectFacet }); + + // opening a lucene index + IndexReader reader = IndexReader.open(getIndexDirectory(), true); + IndexSearcher searcher = new IndexSearcher(reader); + + // decorate it with a bobo index reader + BoboIndexReader boboReader = BoboIndexReader.getInstance(reader, handlerList); + + // creating a browse request + BrowseRequest br = new BrowseRequest(); + br.setCount(10); + br.setOffset(0); + + // add a selection + BrowseSelection sel = new BrowseSelection("title"); + sel.addValue("bodyintitle"); + br.addSelection(sel); + + // parse a query + QueryParser parser = new QueryParser(Version.LUCENE_33, "content", analyser); + //Query q = parser.parse("*:*"); + Query q = parser.parse("title:serie"); + br.setQuery(q); + + // add the facet output specs + FacetSpec dateSpec = new FacetSpec(); + dateSpec.setOrderBy(FacetSortSpec.OrderHitsDesc); + br.setFacetSpec("subject", dateSpec); + + // perform browse + Browsable browser = new BoboBrowser(boboReader); + //BrowseResult result = browser.browse(br); + //Map<String, FacetAccessible> facetMap = new HashMap<String, FacetAccessible>(); + //browse(br, collector, facetMap); + Map<String, FacetAccessible> facetMap = getGroupCollector(searcher, browser, br); + + //int totalHits = result.getNumHits(); + //BrowseHit[] hits = result.getHits(); + + if (facetMap != null) { + FacetAccessible dateFacets = facetMap.get("subject"); + List<BrowseFacet> facetVals = dateFacets.getFacets(); + for (BrowseFacet facetVal : facetVals) { + System.out.println("Facet subject " + facetVal.getValue() + "(" + facetVal.getFacetValueHitCount() + ")"); + } + } + } + + public static Map<String, FacetAccessible> getGroupCollector(IndexSearcher searcher, Browsable browser, BrowseRequest br) throws IOException, BrowseException { + + // first pass : + TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("subject", Sort.RELEVANCE, 100); + + CachingCollector cachedCollector = CachingCollector.create(c1, true, 4.0); + + searcher.search(br.getQuery(), cachedCollector); + + Collection<SearchGroup<String>> topGroups = c1.getTopGroups(0, true); + + if (topGroups == null) { + // No groups matched + return null; + } + + boolean getScores = true; + boolean getMaxScores = true; + boolean fillFields = true; + TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("subject", + topGroups, Sort.RELEVANCE, Sort.RELEVANCE, 1, + getScores, getMaxScores, fillFields); + + //Optionally compute total group count + /*boolean requiredTotalGroupCount = true; + TermAllGroupsCollector allGroupsCollector = null; + if (requiredTotalGroupCount) { + allGroupsCollector = new TermAllGroupsCollector("author"); + c2 = (TermSecondPassGroupingCollector)MultiCollector.wrap(c2, allGroupsCollector); + }*/ + + /*if (cachedCollector.isCached()) { + // Cache fit within maxCacheRAMMB, so we can replay it: + cachedCollector.replay(c2); + } else { + // Cache was too large; must re-execute query: + searcher.search(br.getQuery(), c2); + }*/ + + Map<String, FacetAccessible> facetMap = new HashMap<String, FacetAccessible>(); + browser.browse(br, c2, facetMap); + + TopGroups groupsResult = c2.getTopGroups(0); + System.out.println("Groups found : " + groupsResult.groups.length); + /*if (requiredTotalGroupCount) { + groupsResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount()); + }*/ + + //c2. + for (GroupDocs group : groupsResult.groups) { + System.out.println(group.groupValue + " (total : " + group.totalHits + ")"); + for (ScoreDoc doc : group.scoreDocs) { + System.out.println(" > " + doc.doc); + } + } + + // Render groupsResult... + return facetMap; + } +} Property changes on: bobobrowselucenefacets/src/main/java/org/bobo/TestLuceneGroupsFacets.java ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision HeadURL
participants (1)
-
echatellier@users.nuiton.org