Helo,i'm new with rapidminer and i have problems with my application,
i want to build a simple 2class text classifier with svm.. and then try to develop it.
i may not know how to build a example set from word vector...
PS: I bought the "How to extend rapidminer paper" but i only need to use existent methods
I read from "Integrating RapidMiner into your application"
I read other posts from forum (or i may have missed something)
if "ExamplesetWriter" and the "Process files from documents" components can transform from word vector to exampleset, why can't i do in java?
i hope for some quick answers what shoud i do..
Thanks
import java.io.File;
import java.io.FileWriter;
import edu.udo.cs.wvtool.config.WVTConfiguration;
import edu.udo.cs.wvtool.config.WVTConfigurationFact;
import edu.udo.cs.wvtool.config.WVTConfigurationRule;
import edu.udo.cs.wvtool.generic.output.WordVectorWriter;
import edu.udo.cs.wvtool.generic.stemmer.DummyStemmer;
import edu.udo.cs.wvtool.generic.stemmer.LovinsStemmerWrapper;
import edu.udo.cs.wvtool.generic.stemmer.PorterStemmerWrapper;
import edu.udo.cs.wvtool.generic.stemmer.WVTStemmer;
import edu.udo.cs.wvtool.generic.tokenizer.NGramTokenizer;
import edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer;
import edu.udo.cs.wvtool.generic.vectorcreation.TFIDF;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTFileInputList;
import edu.udo.cs.wvtool.main.WVTWordVector;
import edu.udo.cs.wvtool.main.WVTool;
import edu.udo.cs.wvtool.wordlist.WVTWordList;
import com.rapidminer.*;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.ModelApplier;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.Learner;
import com.rapidminer.operator.learner.functions.kernel.LibSVMLearner;
import com.rapidminer.tools.OperatorService;
import java.io.IOException;
/**
* An example program on how to use the word vector tool.
*
* @author Michael Wurst
* @version $Id$
*
*/
public class Test {
/*
public void SVMLearner(){
Learner learner = (Learner)OperatorService.createOperator(LibSVMLearner.class);
learner.
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_SVM_TYPE, new Integer(LibSVMLearner.SVM_TYPE_C_SVC).toString());
// ((Operator)learner).setParameter(LibSVMLearner.PARAMETER_KERNEL_TYPE, "0");//linear
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_EPSILON, "0.001");
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_C, "0.0");
// ((Operator)learner).setParameter(LibSVMLearner.PARAMETER_P, "0.1");
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_CONFIDENCE_FOR_MULTICLASS, "true");
model = learner.learn("c:/hjh.txt");
}*/
public static void main(String[] args) throws Exception {
// set properties to point to plugin directory
String pluginDirString = new File("D:\\Data\\Software\\Instalations\\RapidMiner5\\lib\\plugins").getAbsolutePath();
System.setProperty(RapidMiner.PROPERTY_RAPIDMINER_INIT_PLUGINS_LOCATION, pluginDirString);
// EXAMPLE HOW TO CALL THE PROGRAM FROM JAVA
// Initialize the WVTool
WVTool wvt = new WVTool(false);
// Initialize the configuration
WVTConfiguration config = new WVTConfiguration();
final WVTStemmer dummyStemmer = new DummyStemmer();
final WVTStemmer porterStemmer = new PorterStemmerWrapper();
// mine->
// final WVTTokenizer pop = new NGramTokenizer(2,fdsa);
// <-mine , oare am pus bine acelasi wvtokenizer
config.setConfigurationRule(WVTConfiguration.STEP_STEMMER, new WVTConfigurationRule() {
public Object getMatchingComponent(WVTDocumentInfo d) {
if (d.getContentLanguage().equals("english"))
return porterStemmer;
else
return dummyStemmer;
}
});
WVTStemmer stemmer = new LovinsStemmerWrapper();
config.setConfigurationRule(WVTConfiguration.STEP_STEMMER, new WVTConfigurationFact(stemmer));
// Initialize the input list with two classes
WVTFileInputList list = new WVTFileInputList(2);
// Add entries
list.addEntry(new WVTDocumentInfo("D:/CrawOut/txt_sentoken/pos", "txt", "", "english", 0));
list.addEntry(new WVTDocumentInfo("D:/CrawOut/txt_sentoken/neg", "txt", "", "english", 1));
// Generate the word list
WVTWordList wordList = wvt.createWordList(list, config);
// Prune the word list
wordList.pruneByFrequency(2, 5);
// Alternativ I: read an already created word list from a file
// WVTWordList wordList2 =
// new WVTWordList(new FileReader("/home/wurst/tmp/wordlisttest.txt"));
// Alternative II: Use predifined dimensions
// List dimensions = new Vector();
// dimensions.add("atheist");
// dimensions.add("christian");
// wordList =
// wvt.createWordList(list, config, dimensions, false);
// Store the word list in a file
wordList.storePlain(new FileWriter("d:/CrawOut/wordlist.txt"));
// WordList sd;
// Create the word vectors
// Set up an output filter (write sparse vectors to a file)
FileWriter outFile = new FileWriter("d:/CrawOut/wv.txt");
WordVectorWriter wvw = new WordVectorWriter(outFile, true);
config.setConfigurationRule(WVTConfiguration.STEP_OUTPUT, new WVTConfigurationFact(wvw));
config.setConfigurationRule(WVTConfiguration.STEP_VECTOR_CREATION, new WVTConfigurationFact(new TFIDF()));
WVTWordVector s;
// Create the vectors
wvt.createVectors(list, config, wordList);
Learner learner = (Learner)OperatorService.createOperator(LibSVMLearner.class);
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_SVM_TYPE, new Integer(LibSVMLearner.SVM_TYPE_C_SVC).toString());
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_KERNEL_TYPE, "0");//linear
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_EPSILON, "0.001");
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_C, "0.0");
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_P, "0.1");
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_CONFIDENCE_FOR_MULTICLASS, "true");
ExampleSet exampleSets;
exampleSets = (ExampleSet) wvw ;
Model model = learner.learn(exampleSets);
// Close the output file
wvw.close();
outFile.close();
// Just for demonstration: Create a vector from a String
WVTWordVector q = wvt.createVector("cmu harvard net", wordList);
}
}
/*import edu.udo.*;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTFileInputList;
public class Test {
public static void main(String[] args){
WVTFileInputList list = new WVTFileInputList(2);
list.addEntry(
new WVTDocumentInfo("D:/CrawOut/txt_sentoken/pos",
"txt","","english",0));
list.addEntry(
new WVTDocumentInfo("D:/CrawOut/txt_sentoken/neg",
"txt","","english",1));
list.
}
}
*/
i get the msg on console:
Exception in thread "main" com.rapidminer.operator.OperatorCreationException: No operator description object given for 'com.rapidminer.operator.learner.functions.kernel.LibSVMLearner'
at com.rapidminer.tools.OperatorService.createOperator(OperatorService.java:564)
at Test.main(Test.java:137)