Author: echatellier Date: 2011-06-30 14:15:59 +0200 (Thu, 30 Jun 2011) New Revision: 1009 Url: http://nuiton.org/repositories/revision/wikitty/1009 Log: Gestion des nullfields et des champs index?\195?\169s (analys?\195?\169) Added: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/FieldModifier.java Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java Added: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/FieldModifier.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/FieldModifier.java (rev 0) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/FieldModifier.java 2011-06-30 12:15:59 UTC (rev 1009) @@ -0,0 +1,65 @@ +/* + * #%L + * + * + * $Id$ + * $HeadURL$ + * %% + * Copyright (C) 2011 Codelutin, Chatellier Eric + * %% + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/lgpl-3.0.html>. + * #L% + */ + +package org.nuiton.wikitty.storage.lucene; + +import org.nuiton.wikitty.WikittyUtil; +import org.nuiton.wikitty.entities.FieldType; +import org.nuiton.wikitty.entities.WikittyExtension; +import org.nuiton.wikitty.search.operators.Element; +import org.nuiton.wikitty.storage.WikittyExtensionStorage; + +/** + * Field modifier. In lucene field are splited in. + * + * @author chatellier + * @version $Revision$ + * + * Last update : $Date$ + * By : $Author$ + */ +public class FieldModifier { + + protected WikittyExtensionStorage extensionStorage; + + public FieldModifier(WikittyExtensionStorage extensionStorage) { + this.extensionStorage = extensionStorage; + } + + public String getLuceneFieldName(String wikittyFieldName, boolean analyzed) { + String result; + if (Element.ELT_ID.equals(wikittyFieldName)) { + result = WikittyLuceneConstants.LUCENE_ID; + } else if (Element.ELT_EXTENSION.equals(wikittyFieldName)) { + result = WikittyLuceneConstants.LUCENE_EXTENSIONS; + } else if (analyzed) { + // TODO peut etre recupererle type suivant le champs là + result = wikittyFieldName + "_a"; + } else { + result = wikittyFieldName; + } + return result; + } +} Property changes on: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/FieldModifier.java ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision HeadURL Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java 2011-06-29 16:03:04 UTC (rev 1008) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java 2011-06-30 12:15:59 UTC (rev 1009) @@ -52,12 +52,12 @@ /** to use log facility, just put in your code: log.info(\"...\"); */ final static private Log log = LogFactory.getLog(LuceneUtil.class); - /** + /* * Get field name used in lucene storage for a field name in query. * * @param fieldName field name to convert * @return field name in lucene - */ + * public static String getLuceneFieldName(String fieldName) { String result; if (Element.ELT_ID.equals(fieldName)) { @@ -68,22 +68,22 @@ result = fieldName; } return result; - } + }*/ - /** + /* * get value of field in Document, field must have only one value * @param d * @param fieldname * @param type optional type to generate lucene field name * @return - */ + * static public Integer getIntFieldValue(Document d, String fieldname) { String luceneFieldName = getLuceneFieldName(fieldname); Object value = d.getFieldable(luceneFieldName).stringValue(); Integer result = convertToInteger(value, luceneFieldName); return result; - } + }*/ /** * Converti un Object en String, si l'objet est de type String un simple Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java 2011-06-29 16:03:04 UTC (rev 1008) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java 2011-06-30 12:15:59 UTC (rev 1009) @@ -59,7 +59,7 @@ import org.nuiton.wikitty.search.operators.Unlike; /** - * Concert wikitty restriction to lucene search query. + * Convert wikitty restriction to lucene search query. * * @author chatellier * @version $Revision$ @@ -72,12 +72,16 @@ /** Query parser depending on {@link Analyzer}. */ protected QueryParser parser; - public Restriction2Lucene(Analyzer analyzer) { + protected FieldModifier fieldModifier; + + public Restriction2Lucene(Analyzer analyzer, FieldModifier fieldModifier) { + this.fieldModifier = fieldModifier; parser = new QueryParser(WikittySearchEngineLucene.WIKITTY_LUCENE_VERSION, - WikittyLuceneConstants.LUCENE_DEFAULT_FIELD, analyzer); + WikittyLuceneConstants.LUCENE_DEFAULT_FIELD + WikittyLuceneConstants.SUFFIX_ANALYZED, + analyzer); // allow "*" or "?" if first character wildcard query - parser.setAllowLeadingWildcard(true); + parser.setAllowLeadingWildcard(true); } /** @@ -170,9 +174,12 @@ } protected String element2Lucene(Element element) { + return element2Lucene(element, false); + } + + protected String element2Lucene(Element element, boolean analyzed) { String result = element.getName(); - //result = fieldModifer.convertToSolr(transaction, result); - result = LuceneUtil.getLuceneFieldName(result); + result = fieldModifier.getLuceneFieldName(result, analyzed); return result; } @@ -181,7 +188,9 @@ * @return */ protected Query isNotNull2Lucene(Null restriction) { - throw new NotImplementedException("Not yet implemented"); + String fieldName = WikittyLuceneConstants.LUCENE_NULL_FIELD + restriction.getFieldName(); + TermQuery query = new TermQuery(new Term(fieldName, "false")); + return query; } /** @@ -189,7 +198,9 @@ * @return */ protected Query isNull2Lucene(Null restriction) { - throw new NotImplementedException("Not yet implemented"); + String fieldName = WikittyLuceneConstants.LUCENE_NULL_FIELD + restriction.getFieldName(); + TermQuery query = new TermQuery(new Term(fieldName, "true")); + return query; } /** @@ -280,7 +291,14 @@ String fieldName = element2Lucene(restriction.getElement()); String value = restriction.getValue(); - TermRangeQuery query = new TermRangeQuery(fieldName, value, "*", true, true); + // ne fonctionne pas, mais devrait + // TermRangeQuery query = new TermRangeQuery(fieldName, value, "*", true, true); + + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term(fieldName, "*")), BooleanClause.Occur.MUST); + TermRangeQuery rangeQuery = new TermRangeQuery(fieldName, "*", value, true, true); + query.add(rangeQuery, BooleanClause.Occur.MUST_NOT); + return query; } @@ -292,7 +310,14 @@ String fieldName = element2Lucene(restriction.getElement()); String value = restriction.getValue(); - TermRangeQuery query = new TermRangeQuery(fieldName, value, "*", false, false); + // ne fonctionne pas, mais devrait + //TermRangeQuery query = new TermRangeQuery(fieldName, value, "*", false, false); + + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term(fieldName, "*")), BooleanClause.Occur.MUST); + TermRangeQuery rangeQuery = new TermRangeQuery(fieldName, "*", value, false, false); + query.add(rangeQuery, BooleanClause.Occur.MUST_NOT); + return query; } Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java 2011-06-29 16:03:04 UTC (rev 1008) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java 2011-06-30 12:15:59 UTC (rev 1009) @@ -67,5 +67,11 @@ static final public String TREENODE_ATTACHED = TREENODE_PREFIX + "attached."; /** Use as field on Wikitty object attached on TreeNode, TreeNodeId is added at end used for facetisation */ - static final public String TREENODE_ATTACHED_ALL = TREENODE_PREFIX + "attached-all"; + static final public String TREENODE_ATTACHED_ALL = TREENODE_PREFIX + "attached.all"; + + /** if field is null, this extra field is set to true otherwise is set to false */ + static final public String LUCENE_NULL_FIELD = WIKITTY_LUCENE_PREFIX + "nullfield."; + + /** Suffixe pour les champs stockés en facon analysé. */ + static final public String SUFFIX_ANALYZED = "_a"; } Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java 2011-06-29 16:03:04 UTC (rev 1008) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java 2011-06-30 12:15:59 UTC (rev 1009) @@ -27,6 +27,7 @@ import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.LUCENE_DEFAULT_FIELD; import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.LUCENE_ID; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.SUFFIX_ANALYZED; import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_ATTACHED; import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_ATTACHED_ALL; import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_DEPTH; @@ -78,6 +79,7 @@ import org.nuiton.wikitty.WikittyConfigOption; import org.nuiton.wikitty.WikittyException; import org.nuiton.wikitty.WikittyUtil; +import org.nuiton.wikitty.entities.FieldType; import org.nuiton.wikitty.entities.FieldType.TYPE; import org.nuiton.wikitty.entities.Wikitty; import org.nuiton.wikitty.entities.WikittyTreeNode; @@ -127,6 +129,9 @@ /** Reader, writer and query analyzer. */ protected Analyzer indexAnalyzer; + /** Field modifier. */ + protected FieldModifier fieldModifier; + /** Lucene version used by wikitty. */ protected static final Version WIKITTY_LUCENE_VERSION = Version.LUCENE_32; @@ -140,7 +145,6 @@ try { indexDirectory = getIndexDirectory(config); - indexAnalyzer = getIndexAnalyzer(config); // hack to create lucene index // new IndexSearcher fails with no index @@ -153,6 +157,10 @@ IOUtils.closeQuietly(writer); } } + + indexAnalyzer = getIndexAnalyzer(config); + + fieldModifier = new FieldModifier(extensionStorage); } catch (IOException ex) { throw new WikittyException("Can't init lucene directory", ex); } @@ -210,7 +218,8 @@ // default to StandardAnalyzer if (analyzer == null) { - analyzer = new StandardAnalyzer(Version.LUCENE_32); + analyzer = new StandardAnalyzer(WIKITTY_LUCENE_VERSION); + //analyzer = new KeywordAnalyzer(); } if (log.isInfoEnabled()) { @@ -707,6 +716,13 @@ /** * Convert a wikitty object to a lucene {@link Document}. * + * Le schema est le suivant: + * - n'est stocké (Store.YES) que ce qui peut servir à faire des facettes + * - n'est not tokennisé (Index.NOT_ANALYZED) ce qui peut servir a faire des facettes + * - dans le cas où le champ peut porter sur des requettes like + * il faut le stocker une deuxieme fois, tokenisé (Index.ANALYZED) c'est + * à dire tokenisé par l'analyser + * * @param w wikitty to convert * @return lucene document */ @@ -726,51 +742,91 @@ document.add(luceneExtField); } - StringBuffer allAsText = new StringBuffer(); // all other wikitty fields for (String wikyttyField : w.getAllFieldNames()) { String ext = WikittyUtil.getExtensionNameFromFQFieldName(wikyttyField); String fieldName = WikittyUtil.getFieldNameFromFQFieldName(wikyttyField); - if (w.getFieldType(wikyttyField).isCollection()) { + boolean hasFieldValue = false; + + // field value storing + FieldType type = w.getFieldType(wikyttyField); + if (type.isCollection()) { List<String> values = w.getFieldAsList(ext, fieldName, String.class); // Store.YES = mandatory for facets // Index.NOT_ANALYZED = mandatory for search on field if (values != null) { + hasFieldValue = true; for (String value : values) { - Field luceneField = new Field(wikyttyField, value, Store.YES, Index.NOT_ANALYZED); - document.add(luceneField); - - // copy content to #fulltext field - allAsText.append(value); + addSingleFieldToDocument(document, wikyttyField, type, value); } } } else { String value = w.getFieldAsString(ext, fieldName); if (StringUtils.isNotEmpty(value)) { - if (log.isTraceEnabled()) { - log.trace("add lucene field : " + wikyttyField + " = " + value); - } - - // Store.YES = mandatory for facets - // Index.ANALYZED = mandatory for search on field - Field luceneField = new Field(wikyttyField, value, Store.YES, Index.NOT_ANALYZED); - document.add(luceneField); - - // copy content to #fulltext field - allAsText.append(value); + hasFieldValue = true; + addSingleFieldToDocument(document, wikyttyField, type, value); } } + + // null field + // not field (no stored, no values) + Field luceneExtField = new Field(WikittyLuceneConstants.LUCENE_NULL_FIELD + wikyttyField, + String.valueOf(!hasFieldValue), Store.YES, Index.NOT_ANALYZED); + document.add(luceneExtField); } - // le champ text n'est pas stocké - Field luceneFullTextField = new Field(LUCENE_DEFAULT_FIELD, - allAsText.toString(), Store.NO, Index.ANALYZED); - document.add(luceneFullTextField); - return document; } + /** + * Ajout d'un champ unique dans un document lucene. Si la methode + * est appelée plusieurs fois avec le même champ (fqFieldName), le champ + * sera multivalué. + * + * @param document lucene document + * @param fqFieldName field name + * @param type field type + * @param value value + */ + protected void addSingleFieldToDocument(Document document, String fqFieldName, FieldType type, String value) { + if (log.isTraceEnabled()) { + log.trace("add lucene field : " + fqFieldName + " = " + value); + } + + // Store.YES = mandatory for facets + // Index.NOT_ANALYZED = mandatory for strict equality + Field luceneField = new Field(fqFieldName, value, Store.YES, Index.NOT_ANALYZED); + document.add(luceneField); + + switch (type.getType()) { + case STRING: + + // Store.NO = pas de facette dessus + // Index.ANALYZED = mandatory for strict equality (pour recherche like) + Field aLuceneField = new Field(fqFieldName + SUFFIX_ANALYZED, value, Store.NO, Index.ANALYZED); + document.add(aLuceneField); + + // copy content to #fulltext field (multivalued) + Field luceneFullTextField = new Field(LUCENE_DEFAULT_FIELD, value, Store.YES, Index.NOT_ANALYZED); + document.add(luceneFullTextField); + + // copy content to #fulltext_a field (multivalued) + Field aLuceneFullTextField = new Field(LUCENE_DEFAULT_FIELD + SUFFIX_ANALYZED, value, Store.NO, Index.ANALYZED); + document.add(aLuceneFullTextField); + + break; + case BINARY: + case DATE: + case BOOLEAN: + case NUMERIC: + case WIKITTY: + break; + default : + throw new WikittyException("Not managed wikitty type : " + type.getType()); + } + } + /* * @see org.nuiton.wikitty.storage.WikittySearchEngine#delete(org.nuiton.wikitty.services.WikittyTransaction, java.util.Collection) */ @@ -819,12 +875,14 @@ PagedResult<String> pagedResult = null; IndexReader indexReader = null; + //IndexSearcher indexSearcher = null; try { indexReader = IndexReader.open(indexDirectory); + //indexSearcher = new IndexSearcher(indexDirectory); // get lucene query Restriction restriction = criteria.getRestriction(); - Restriction2Lucene restriction2Lucene = new Restriction2Lucene(indexAnalyzer); + Restriction2Lucene restriction2Lucene = new Restriction2Lucene(indexAnalyzer, fieldModifier); Query query = restriction2Lucene.toLucene(restriction); if (log.isDebugEnabled()) { @@ -875,7 +933,8 @@ br.setOffset(firstIndex); br.setCount(endIndex - firstIndex); br.setQuery(query); - if (sortFields.isEmpty()) { + + if (!sortFields.isEmpty()) { SortField[] sortFieldsA = sortFields.toArray(new SortField[sortFields.size()]); br.setSort(sortFieldsA); } @@ -893,13 +952,14 @@ } } - /*// manage criteria facets + //// manage criteria facets List<Criteria> facetCriterias = criteria.getFacetCriteria(); if (facetCriterias != null) { - for (Criteria facetCriteria : facetCriterias) { - Query facetQuery = restriction2Lucene.toLucene(facetCriteria.getRestriction()); - } - }*/ + throw new WikittyException("Facet criteria not supported yet"); + // for (Criteria facetCriteria : facetCriterias) { + // Query facetQuery = restriction2Lucene.toLucene(facetCriteria.getRestriction()); + // } + } // perform request Browsable browser = new BoboBrowser(boboReader); @@ -943,25 +1003,25 @@ pagedResult = new PagedResult<String>(firstIndex, pagedResultCount, query.toString(), pagedResultFacets, results); - // execute search query - /*List<String> results = new ArrayList<String>(); + /*// execute search query + List<String> results = new ArrayList<String>(); int totalHitCount = 0; if (endIndex == 0) { // il faut un cas special pour le 0, sinon lucene rale. // Utilisation d'un collector qui ne fait que compter // le nombre de résultat TotalHitCountCollector collector = new TotalHitCountCollector(); - searcher.search(query, null, collector); + indexSearcher.search(query, null, collector); totalHitCount = collector.getTotalHits(); } else { // ca où il y a vraiment des documents retourné par la requete TopDocs topDocs = null; if (sortFields.isEmpty()) { - topDocs = searcher.search(query, null, endIndex); + topDocs = indexSearcher.search(query, null, endIndex); } else { Sort sortOption = new Sort(sortFields.toArray(new SortField[sortFields.size()])); - topDocs = searcher.search(query, null, endIndex, sortOption); + topDocs = indexSearcher.search(query, null, endIndex, sortOption); } ScoreDoc[] scoreDocs = topDocs.scoreDocs; @@ -970,7 +1030,7 @@ for (ScoreDoc scoreDoc : scoreDocs) { if (currentDocIndex >= firstIndex && currentDocIndex <= endIndex) { int luceneId = scoreDoc.doc; - Document document = searcher.doc(luceneId); + Document document = indexSearcher.doc(luceneId); String wikittyId = document.get(WikittyLuceneConstants.LUCENE_ID); results.add(wikittyId); } @@ -988,6 +1048,7 @@ throw new WikittyException("Can't search on index", ex); } finally { IOUtils.closeQuietly(indexReader); + //IOUtils.closeQuietly(indexSearcher); } return pagedResult; @@ -1096,7 +1157,7 @@ // on a dans treeSearch uniquement le noeud passe en parametre // et ses enfants jusqu'a la profondeur demandee Restriction2Lucene restriction2Lucene = - new Restriction2Lucene(indexAnalyzer); + new Restriction2Lucene(indexAnalyzer, fieldModifier); Query query = restriction2Lucene.toLucene(treeCriteria.getRestriction()); TopDocs topDocs = searcher.search(query, null, 1000); //SolrQuery query = new SolrQuery(SOLR_QUERY_PARSER + queryString);