Author: glandais Date: 2007-12-17 10:57:23 +0000 (Mon, 17 Dec 2007) New Revision: 66 Added: trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/SearcherPool.java Modified: trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/Database.java trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/LuceneDatabase.java Log: MAJ interface Database Searcher pool Modified: trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/Database.java =================================================================== --- trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/Database.java 2007-12-11 17:08:37 UTC (rev 65) +++ trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/Database.java 2007-12-17 10:57:23 UTC (rev 66) @@ -22,9 +22,20 @@ * * @throws Exception */ - public abstract void open() throws Exception; + public void open() throws Exception { + open(false); + } /** + * Open database connection + * + * @param create + * Create database + * @throws Exception + */ + public abstract void open(boolean create) throws Exception; + + /** * Close database connection * * @throws Exception @@ -47,18 +58,13 @@ * Element to insert * @throws Exception */ - public abstract void insertElement(MetaDataEntity element, List<Reader> readers, boolean forceCommit) throws Exception; + public abstract void insertElement(MetaDataEntity element, + List<Reader> readers) throws Exception; - public void insertElement(MetaDataEntity element, List<Reader> readers) - throws Exception { - insertElement(element, readers, true); - } - // Read /** - * Get an element from its id and its version - * null if not fund + * Get an element from its id and its version null if not fund * * @param uuid * Id @@ -71,8 +77,8 @@ throws Exception; /** - * Get all version of an element thanks to its id - * Empty list if no element with this id + * Get all version of an element thanks to its id Empty list if no element + * with this id * * @param uuid * Id @@ -82,8 +88,7 @@ public abstract List<Version> getVersions(String uuid) throws Exception; /** - * Retrieve an element in its latest version - * null if not fund + * Retrieve an element in its latest version null if not fund * * @param uuid * Id @@ -115,8 +120,7 @@ } /** - * Retrieve all elements by id - * Empty list if no element with this id + * Retrieve all elements by id Empty list if no element with this id * * @param uuid * Id @@ -127,7 +131,7 @@ throws Exception; /** - * Retrieve elements with specific properties + * Retrieve elements with specific properties<br> * Empty list if no element * * @param properties @@ -135,11 +139,41 @@ * @return Element list * @throws Exception */ + public Set<MetaDataEntity> findElementsByProperties( + Map<String, String> properties) throws Exception { + return findElementsByProperties(properties, 0, -1); + } + + /** + * Retrieve elements with specific properties<br> + * Empty list if no element + * + * @param properties + * Matching properties needed + * @param start + * Index of first element returned + * @param count + * Number of elements to return + * @return Element list + * @throws Exception + */ public abstract Set<MetaDataEntity> findElementsByProperties( + Map<String, String> properties, int start, int count) + throws Exception; + + /** + * Retrieve element count specific properties + * + * @param properties + * Matching properties needed + * @return Number of elements fund + * @throws Exception + */ + public abstract int findElementsByPropertiesCount( Map<String, String> properties) throws Exception; /** - * Retrieve elements by content search + * Retrieve elements by content search<br> * Empty list if no element * * @param searchedText @@ -147,9 +181,38 @@ * @return Element list * @throws Exception */ + public Set<MetaDataEntity> findElementsByContentSearch(String queryText) + throws Exception { + return findElementsByContentSearch(queryText, 0, -1); + } + + /** + * Retrieve elements by content search<br> + * Empty list if no element + * + * @param queryText + * Searched text + * @param start + * Index of first element returned + * @param count + * Number of elements to return + * @return Element list + * @throws Exception + */ public abstract Set<MetaDataEntity> findElementsByContentSearch( - String queryText) throws Exception; + String queryText, int start, int count) throws Exception; + /** + * Retrieve element count with specific content + * + * @param queryText + * Searched text + * @return Number of elements fund + * @throws Exception + */ + public abstract int findElementsByContentSearchCount(String queryText) + throws Exception; + // Delete /** Modified: trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/LuceneDatabase.java =================================================================== --- trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/LuceneDatabase.java 2007-12-11 17:08:37 UTC (rev 65) +++ trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/LuceneDatabase.java 2007-12-17 10:57:23 UTC (rev 66) @@ -1,7 +1,8 @@ package fr.cemagref.simexplorer.is.storage.database.lucene; +import java.io.File; +import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -12,12 +13,11 @@ import java.util.Map.Entry; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; @@ -30,6 +30,8 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.LockFactory; +import org.apache.lucene.store.NoLockFactory; import fr.cemagref.simexplorer.is.storage.database.Database; import fr.cemagref.simexplorer.is.storage.entities.metadata.MetaDataEntity; @@ -44,77 +46,125 @@ public class LuceneDatabase extends Database { /** - * Content analyzer + * Lock factory for Lucene writer */ - private static Analyzer analyser; + private static LockFactory lockFactory = new NoLockFactory(); /** - * Index location + * Location of index */ - private static String indexDirectory; + private Directory directory; /** - * Database writer + * Index writer */ - private static IndexWriter writer; + private IndexWriter writer = null; /** - * Database searcher + * Pool of searcher */ - private static Searcher searcher; + private SearcherPool searcherPool = new SearcherPool(); /** - * Init flag + * Searcher that should be used for new queries */ - private static boolean initok = false; + private Searcher cachedSearcher = null; + /** + * Date of latest optimization + */ + private Date lastOptimize = new Date(); - @Override - public void open() throws Exception { - if (!initok) { - // Create a per field analyze - analyser = new PerFieldAnalyzerWrapper(new SimpleAnalyzer()); + /** + * Retrieve searcher for a new query + * + * @return Searcher + * @throws CorruptIndexException + * @throws IOException + */ + private Searcher getSearcher() throws CorruptIndexException, IOException { + synchronized (this) { + // If cached searcher is null (as after an index write) + if (cachedSearcher == null) { + // Instanciate a new searcher + cachedSearcher = new IndexSearcher(directory); + } + // Mark searcher as being used in a new query + searcherPool.capture(cachedSearcher); + return cachedSearcher; + } + } - // Specific analyzers - // analyser.addAnalyzer(KEY_, new KeywordAnalyzer()); - // analyser.addAnalyzer(KEY_, new KeywordAnalyzer()); + /** + * Release searcher, after the end of a query execution + * + * @param searcher + * Searcher to be released + * @throws IOException + */ + private void releaseSearcher(Searcher searcher) throws IOException { + synchronized (this) { + searcherPool.release(searcher); + } + } - // FIXME index location - indexDirectory = "./index/"; - - // Create index if it does not exist - boolean create = false; - if (!IndexReader.indexExists(indexDirectory)) { - create = true; + /** + * Called to invalidate reader + * + * @throws IOException + */ + private void resetReader() throws IOException { + synchronized (this) { + // Reset searcher + if (cachedSearcher != null) { + // Mark as should be closed asap + searcherPool.mark(cachedSearcher); + cachedSearcher = null; } + } + } - // Create a Store for Lucene DB - Directory dir = FSDirectory.getDirectory(indexDirectory); + @Override + public void open(boolean create) throws Exception { + // Create an analyzer + Analyzer analyzer = new SimpleAnalyzer(); - // Create Lucene indexer - writer = new IndexWriter(dir, true, analyser, create); - - searcher = new IndexSearcher(indexDirectory); - - // FIXME - System.out.println("writer.docCount() : " + writer.docCount()); - - initok = true; - } + // FIXME index location + File indexDir = new File("./index/"); + + // Base directory index + directory = FSDirectory.getDirectory(indexDir, lockFactory); + + // Instanciate unique writer + writer = new IndexWriter(directory, true, analyzer, create); } @Override public void close() throws Exception { - // Close writer writer.close(); - searcher.close(); - initok = false; + resetReader(); } + private synchronized void synchedCommit() throws CorruptIndexException, + IOException { + // Flush in ram data + writer.flush(); + // Check if an optimization is required + Date now = new Date(); + long elapsed = now.getTime() - lastOptimize.getTime(); + // One optimize per hour, if readers are closed + if (elapsed > 3600 * 1000 && searcherPool.allClosed()) { + writer.optimize(); + lastOptimize = new Date(); + } + //System.out.println(writer.docCount()); + // Invalidate reader + resetReader(); + } + @Override public void commit() throws Exception { - // write index - writer.flush(); - searcher = new IndexSearcher(indexDirectory); + synchedCommit(); } - + @Override - public void insertElement(MetaDataEntity element, List<Reader> readers, boolean forceCommit) throws Exception { + public void insertElement(MetaDataEntity element, List<Reader> readers) + throws Exception { // Save element to a Lucene document Document document = saveLuceneElement(element, readers); @@ -129,9 +179,6 @@ // add document to index writer.addDocument(document); - if (forceCommit) { - commit(); - } } @Override @@ -139,10 +186,17 @@ throws Exception { MetaDataEntity result = null; - Hits hits = getHitsByIdVersion(uuid, version); - if (hits.length() != 0) { - // convert first document to element - result = loadLuceneElement(hits.doc(0)); + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + Hits hits = getHitsByIdVersion(uuid, version, searcher); + if (hits != null && hits.length() != 0) { + // convert first document to element + result = loadLuceneElement(hits.doc(0)); + } + } finally { + // Release searcher instance + releaseSearcher(searcher); } return result; @@ -152,17 +206,18 @@ * @param uuid * @param version * @return + * @throws IOException * @throws Exception */ - private Hits getHitsByIdVersion(String uuid, Version version) - throws Exception { + private Hits getHitsByIdVersion(String uuid, Version version, + Searcher searcher) throws IOException { // Retrieve element with id/version Map<String, String> properties = new HashMap<String, String>(); properties.put(KEY_UUID, uuid); properties.put(KEY_VERSION, version.toString()); // Search elements - Hits hits = findHits(properties); + Hits hits = findHits(properties, searcher); return hits; } @@ -171,13 +226,24 @@ // Retrieve all document corresponding to id Map<String, String> properties = new HashMap<String, String>(); properties.put(KEY_UUID, uuid); - Hits hits = findHits(properties); - // Add all versions to a list - List<Version> versions = new ArrayList<Version>(); - for (int i = 0; i < hits.length(); i++) { - Document doc = hits.doc(i); - versions.add(new Version(doc.get(KEY_VERSION))); + List<Version> versions = null; + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + Hits hits = findHits(properties, searcher); + + // Add all versions to a list + versions = new ArrayList<Version>(); + if (hits != null) { + for (int i = 0; i < hits.length(); i++) { + Document doc = hits.doc(i); + versions.add(new Version(doc.get(KEY_VERSION))); + } + } + } finally { + // Release searcher instance + releaseSearcher(searcher); } return versions; @@ -188,40 +254,118 @@ // Create hash map with id Map<String, String> properties = new HashMap<String, String>(); properties.put(KEY_UUID, uuid); + + Set<MetaDataEntity> result = null; + + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + Hits hits = findHits(properties, searcher); + result = convertHitsToElements(hits, 0, -1); + } finally { + // Release searcher instance + releaseSearcher(searcher); + } + // Return all elements parsed from documents fund - return convertHitsToElements(findHits(properties)); + return result; } @Override + public int findElementsByPropertiesCount(Map<String, String> properties) + throws Exception { + int result = 0; + + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + Hits hits = findHits(properties, searcher); + result = hits.length(); + } finally { + // Release searcher instance + releaseSearcher(searcher); + } + + // Return all elements parsed from documents fund + return result; + } + + @Override public Set<MetaDataEntity> findElementsByProperties( - Map<String, String> properties) throws Exception { + Map<String, String> properties, int start, int count) + throws Exception { + Set<MetaDataEntity> result = null; + + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + Hits hits = findHits(properties, searcher); + result = convertHitsToElements(hits, start, count); + } finally { + // Release searcher instance + releaseSearcher(searcher); + } + // Return all elements parsed from documents fund - return convertHitsToElements(findHits(properties)); + return result; } @Override - public Set<MetaDataEntity> findElementsByContentSearch(String queryText) throws Exception { + public int findElementsByContentSearchCount(String queryText) + throws Exception { Analyzer analyzer = new SimpleAnalyzer(); QueryParser parser = new QueryParser(KEY_SEARCHABLE_CONTENT, analyzer); Query luceneQuery = parser.parse(queryText); - // Create a bean with hits and searcher - Hits luceneHits = searcher.search(luceneQuery); + int result = 0; - // Convert hits to elements - Set<MetaDataEntity> result = convertHitsToElements(luceneHits); + Hits hits = null; + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + hits = searcher.search(luceneQuery); + // Return hits length + result = hits.length(); + } finally { + // Release searcher instance + releaseSearcher(searcher); + } return result; } @Override + public Set<MetaDataEntity> findElementsByContentSearch(String queryText, + int start, int count) throws Exception { + Analyzer analyzer = new SimpleAnalyzer(); + QueryParser parser = new QueryParser(KEY_SEARCHABLE_CONTENT, analyzer); + Query luceneQuery = parser.parse(queryText); + + Set<MetaDataEntity> result = null; + + Hits hits = null; + // Get current searcher instance + Searcher searcher = getSearcher(); + try { + hits = searcher.search(luceneQuery); + // Convert hits to elements + result = convertHitsToElements(hits, start, count); + } finally { + // Release searcher instance + releaseSearcher(searcher); + } + + return result; + } + + @Override public void deleteElement(String uuid, Version version) throws Exception { // Delete element in db with term search Term[] terms = new Term[2]; terms[0] = new Term(KEY_UUID, uuid); terms[1] = new Term(KEY_VERSION, version.toString()); + writer.deleteDocuments(terms); - commit(); } /** @@ -230,10 +374,11 @@ * @param properties * criteria * @return Documents and search handle + * @throws IOException * @throws Exception */ - private Hits findHits(Map<String, String> properties) - throws Exception { + private Hits findHits(Map<String, String> properties, Searcher searcher) + throws IOException { // Create a query with all parameters BooleanQuery query = new BooleanQuery(); for (Entry<String, String> kv : properties.entrySet()) { @@ -241,9 +386,8 @@ BooleanClause.Occur.MUST); } - // Create a bean with hits and searcher - Hits luceneHits = searcher.search(query); - return luceneHits; + Hits hits = searcher.search(query); + return hits; } /** @@ -251,17 +395,26 @@ * * @param hits * Document collection + * @param start + * Index of first document to return + * @param count + * Number of documents to return * @return Entities list + * @throws IOException + * @throws CorruptIndexException * @throws Exception */ - private Set<MetaDataEntity> convertHitsToElements(Hits hits) - throws Exception { + private Set<MetaDataEntity> convertHitsToElements(Hits hits, int start, + int count) throws CorruptIndexException, IOException { Set<MetaDataEntity> elements = new HashSet<MetaDataEntity>(); - // For all documents - for (int i = 0; i < hits.length(); i++) { - Document document = hits.doc(i); - // Convert to element - elements.add(loadLuceneElement(document)); + if (hits != null) { + // For documents in range + for (int i = start; i < hits.length() + && (i < start + count || count == -1); i++) { + Document document = hits.doc(i); + // Convert to element + elements.add(loadLuceneElement(document)); + } } return elements; } @@ -276,8 +429,7 @@ * @return Instance of element * @throws Exception */ - private MetaDataEntity loadLuceneElement(Document document) - throws Exception { + private MetaDataEntity loadLuceneElement(Document document) { MetaDataEntity element = new MetaDataEntity(); element.setUuid(document.get(KEY_UUID)); element.setName(document.get(KEY_NAME)); @@ -295,14 +447,14 @@ List<Fieldable> fields = document.getFields(); for (Fieldable fieldable : fields) { - + if (fieldable.name().startsWith(KEY_DESCRIPTOR)) { String field = fieldable.name().replace(KEY_DESCRIPTOR + ".", ""); String value = fieldable.stringValue(); descriptors.put(field, value); } - + if (fieldable.name().startsWith(KEY_ATTACHMENT)) { String field = fieldable.name().replace(KEY_ATTACHMENT + ".", ""); @@ -310,26 +462,17 @@ attachments.put(field, value); } } - + element.setDescriptors(descriptors); element.setAttachments(attachments); - String parentId = document.get(KEY_PARENTDATA_UUID); - String parentVersion = document.get(KEY_PARENTDATA_VERSION); - if (parentId != null && parentVersion != null) { - MetaDataEntity parentData = getElement(parentId, new Version( - parentVersion)); - element.setParentData(parentData); - } + element.setParentDataUuid(document.get(KEY_PARENTDATA_UUID)); + element.setParentDataVersion(document.get(KEY_PARENTDATA_VERSION)); - parentId = document.get(KEY_PARENTVERSION_UUID); - parentVersion = document.get(KEY_PARENTVERSION_VERSION); - if (parentId != null && parentVersion != null) { - MetaDataEntity parentByVersion = getElement(document - .get(KEY_PARENTVERSION_UUID), new Version(document - .get(KEY_PARENTVERSION_VERSION))); - element.setParentVersion(parentByVersion); - } + element.setParentVersionUuid(document.get(KEY_PARENTVERSION_UUID)); + element + .setParentVersionVersion(document + .get(KEY_PARENTVERSION_VERSION)); return element; } @@ -346,7 +489,8 @@ * @return * @throws Exception */ - private Document saveLuceneElement(MetaDataEntity element, List<Reader> readers) throws Exception { + private Document saveLuceneElement(MetaDataEntity element, + List<Reader> readers) { Document document = new Document(); document.add(simpleField(KEY_UUID, element.getUuid())); @@ -372,29 +516,27 @@ document.add(simpleField(key, value)); } - if (element.getParentData() != null) { + if (element.getParentDataUuid() != null) { document.add(simpleField(KEY_PARENTDATA_UUID, element - .getParentData().getUuid())); + .getParentDataUuid())); + } + if (element.getParentDataVersion() != null) { document.add(simpleField(KEY_PARENTDATA_VERSION, element - .getParentData().getVersion().toString())); + .getParentDataVersion())); } - if (element.getParentVersion() != null) { + if (element.getParentVersionUuid() != null) { document.add(simpleField(KEY_PARENTVERSION_UUID, element - .getParentVersion().getUuid())); + .getParentVersionUuid())); + } + if (element.getParentVersionVersion() != null) { document.add(simpleField(KEY_PARENTVERSION_VERSION, element - .getParentVersion().getVersion().toString())); + .getParentVersionVersion())); } - StringBuffer buf = new StringBuffer(); for (Reader reader : readers) { - for(int c = reader.read(); c != -1; c = reader.read()) { - buf.append((char)c); - } + document.add(new Field(KEY_SEARCHABLE_CONTENT, reader)); } - - Reader reader = new StringReader(buf.toString()); - document.add(new Field(KEY_SEARCHABLE_CONTENT, reader)); - + return document; } Added: trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/SearcherPool.java =================================================================== --- trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/SearcherPool.java (rev 0) +++ trunk/simexplorer-si-storage/src/java/fr/cemagref/simexplorer/is/storage/database/lucene/SearcherPool.java 2007-12-17 10:57:23 UTC (rev 66) @@ -0,0 +1,131 @@ +package fr.cemagref.simexplorer.is.storage.database.lucene; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.lucene.search.Searcher; + +/** + * Pool of searchers, being randomly accessed<br> + * Manage life cycle of searchers + * + * @author landais + * + */ +public class SearcherPool { + + /** + * Searcher record + * + * @author landais + * + */ + private class SearcherInfo { + /** + * Numbre of queries using this searcher + */ + public int count = 0; + /** + * If marked, searcher is not valid anymore<br> + * It should be closed upon last query ended + */ + public boolean marked = false; + } + + /** + * Searchers used by database + */ + Map<Searcher, SearcherInfo> searchers = new HashMap<Searcher, SearcherInfo>(); + + /** + * Called when a searcher will be used for a query + * + * @param searcher + * Searcher used + */ + public void capture(Searcher searcher) { + SearcherInfo searcherInfo = null; + // Check is searcher is already referenced + if (searchers.containsKey(searcher)) { + // Retrieve searcher info + searcherInfo = searchers.get(searcher); + } else { + // Create searcher info otherwise + searcherInfo = new SearcherInfo(); + // Store it + searchers.put(searcher, searcherInfo); + } + // Increment usage + searcherInfo.count = searcherInfo.count + 1; + } + + /** + * Check if searcher should be closed + * + * @param searcher + * Searcher aiming to be closed + * @param searcherInfo + * Searcher info related + * @throws IOException + */ + private void checkClose(Searcher searcher, SearcherInfo searcherInfo) + throws IOException { + // searcher should be marked as invalid and not used anymore + if (searcherInfo.marked && searcherInfo.count == 0) { + // Remove searcher + searchers.remove(searcher); + // Close it + searcher.close(); + } + } + + /** + * Release a searcher from a query + * + * @param searcher + * Searcher released + * + * @throws IOException + */ + public void release(Searcher searcher) throws IOException { + // Retrieve searcher info + SearcherInfo searcherInfo = searchers.get(searcher); + // Decrement usage + searcherInfo.count = searcherInfo.count - 1; + // Check if searcher should be closed + checkClose(searcher, searcherInfo); + } + + /** + * Mark searcher as being invalid anymore + * + * @param searcher + * @throws IOException + */ + public void mark(Searcher searcher) throws IOException { + // Retrieve searcher + SearcherInfo searcherInfo = searchers.get(searcher); + // Mark as invalid + searcherInfo.marked = true; + // Check if searcher should be closed + checkClose(searcher, searcherInfo); + } + + /** + * Check if any reader is opened + * + * @return true if no reader is open + */ + public boolean allClosed() { + for (Entry<Searcher, SearcherInfo> kv : searchers.entrySet()) { + if (kv.getValue().count > 0) { + return false; + } + } + return true; + } + +}