A program to recognize and reward our most engaged community members
package com.rapidminer.operator.text.io.wordfilter;import java.io.File;import java.net.MalformedURLException;import java.net.URL;import java.util.ArrayList;import java.util.List;import com.rapidminer.operator.OperatorDescription;import com.rapidminer.operator.OperatorException;import com.rapidminer.operator.text.Document;import com.rapidminer.operator.text.Token;import com.rapidminer.operator.text.io.AbstractTokenProcessor;import com.rapidminer.parameter.UndefinedParameterError;import edu.mit.jwi.Dictionary;import edu.mit.jwi.IDictionary;import edu.mit.jwi.item.IIndexWord;import edu.mit.jwi.item.ISynset;import edu.mit.jwi.item.ISynsetID;import edu.mit.jwi.item.IWord;import edu.mit.jwi.item.IWordID;import edu.mit.jwi.item.POS;import edu.mit.jwi.item.Pointer;import edu.mit.jwi.morph.WordnetStemmer;public class WordnetHyponymOperator extends AbstractTokenProcessor { private WordnetStemmer stemmer; private IDictionary dict; public WordnetHyponymOperator(OperatorDescription description) { super(description); String wnhome = "/usr/local/WordNet-3.0/"; String path = wnhome + File.separator + "dict"; URL url = null; try { url = new URL("file", null, path); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } // construct the dictionary object and open it IDictionary dict = new Dictionary(url); dict.open(); WordnetStemmer stemmer = new WordnetStemmer(dict); this.dict = dict; this.stemmer = stemmer; }@Override protected Document doWork(Document textObject) throws OperatorException { List<Token> newSequence = new ArrayList<Token>(textObject .getTokenSequence().size()); for (Token token : textObject.getTokenSequence()) { List<String> stems = stemmer.findStems(token.getToken(), POS.NOUN); if (stems != null && stems.size() > 0) { String word2 = stems.get(0); IIndexWord idxWord = dict.getIndexWord(word2, POS.NOUN); if (idxWord != null && idxWord.getWordIDs().size() > 0) { if (idxWord != null && idxWord.getWordIDs().size() > 0) { IWordID wordID = idxWord.getWordIDs().get(0); IWord word = dict.getWord(wordID); ISynset synset = word.getSynset(); List<ISynsetID> blub = synset.getRelatedMap().get( Pointer.HYPERNYM); for (ISynsetID iSynsetID : blub) { ISynset set = dict.getSynset(iSynsetID); List<IWord> bla = set.getWords(); for (IWord iWord : bla) { newSequence.add(new Token(iWord.getLemma(), token.getWeight())); } } } } } newSequence.add(token); } textObject.setTokenSequence(newSequence); return textObject; }}