Author: echatellier Date: 2013-06-16 22:22:01 +0200 (Sun, 16 Jun 2013) New Revision: 242 Url: http://forge.codelutin.com/projects/cantharella/repository/revisions/242 Log: fixes #2712: Keep duplicated caracters (even 2 last ones) Added: trunk/cantharella.data/src/test/java/nc/ird/cantharella/data/model/search/ trunk/cantharella.data/src/test/java/nc/ird/cantharella/data/model/search/CantharellaAnalyzerTest.java Modified: trunk/cantharella.data/src/main/java/nc/ird/cantharella/data/model/search/CantharellaStemmer.java Modified: trunk/cantharella.data/src/main/java/nc/ird/cantharella/data/model/search/CantharellaStemmer.java =================================================================== --- trunk/cantharella.data/src/main/java/nc/ird/cantharella/data/model/search/CantharellaStemmer.java 2013-06-14 16:19:26 UTC (rev 241) +++ trunk/cantharella.data/src/main/java/nc/ird/cantharella/data/model/search/CantharellaStemmer.java 2013-06-16 20:22:01 UTC (rev 242) @@ -252,8 +252,10 @@ len--; if (s[len - 1] == 'e') len--; + /* XXX chatellier 20130516 disabled for cantharella because + duplicated characters are usefull in some refs identifiers if (s[len - 1] == s[len - 2]) - len--; + len--;*/ } return len; } Added: trunk/cantharella.data/src/test/java/nc/ird/cantharella/data/model/search/CantharellaAnalyzerTest.java =================================================================== --- trunk/cantharella.data/src/test/java/nc/ird/cantharella/data/model/search/CantharellaAnalyzerTest.java (rev 0) +++ trunk/cantharella.data/src/test/java/nc/ird/cantharella/data/model/search/CantharellaAnalyzerTest.java 2013-06-16 20:22:01 UTC (rev 242) @@ -0,0 +1,39 @@ +package nc.ird.cantharella.data.model.search; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.Version; +import org.junit.Assert; +import org.junit.Test; + +public class CantharellaAnalyzerTest { + + /** + * Test que les caractères dupliqués ne sont pas perdu (supprimé) par l'analyzer. + * @throws IOException + */ + @Test + public void testDuplicated() throws IOException { + Analyzer analyzer = new CantharellaAnalyzer(Version.LUCENE_36); + + // without * + StringReader reader = new StringReader("R3044"); + TokenStream stream = analyzer.tokenStream("label", reader); + stream.incrementToken(); + String term = stream.getAttribute(CharTermAttribute.class).toString(); + Assert.assertEquals("r3044", term.toString()); + + // with * + reader = new StringReader("*3044"); + stream = analyzer.tokenStream("label", reader); + stream.incrementToken(); + term = stream.getAttribute(CharTermAttribute.class).toString(); + Assert.assertEquals("3044", term.toString()); + + analyzer.close(); + } +} Property changes on: trunk/cantharella.data/src/test/java/nc/ird/cantharella/data/model/search/CantharellaAnalyzerTest.java ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision HeadURL