"FeatureExtraction from XML   LibSVM   Java"
Hi All,
First of all i would like to thanks the Rapid Miner guys for their great product !
Thanks a lot for the examples , documentation and of course, the wizards !!
I would also like to thank Michael Wurst for his tutorial on his website (nemoz.org) !!!
----------------------------
I'm a newbie student and i have assignment  to classify urls.
I read a lot of documentation and searched in the forums , but i guess i still have 2 problems ( RapidMiner version 4.2 ) ...
I created an XML file for each url features in 2 folders.
.\train\news\www.news1.de.xml
.\train\news\www.news2.de.xml
.\train\porn\www.porn1.de.xml
.\train\porn\www.porn2.de.xml
each xml looks like:
<myXML>
      <title> my title </title>
      <keywords> my keywords </keywords>
      <numberOfPages> 6 </numberOfPages>
</myXML>
----------------------------
1. when i am running the project file ( below ) in RapidMiner - with libsvm - it says :
   "Message: This learning scheme does not have sufficient capabilities for the given data set: polynominal attributes not supported"   
    I tried to use the "06_ExtractionAndWordVecotor.xml" example - but it gave me the same error.
2. I tried to load the model using java - but i cannot understand how to load the features themselves instead of the whole text ...
   ( TextInput instead of SingleTextInput ?? ) , the simple example works - but without the features ...
I would really appreciate your help !
Thanks a lot for everything  !!
Jorno
---------------------------------------------
RAPID MINER CONFIGURATION FILE
---------------------------------------------
<?xml version="1.0" encoding="windows-1252"?>
<process version="4.4">
  <operator name="Root" class="Process" expanded="yes">     
      <parameter key="logverbosity"   value="init"/>
      <parameter key="random_seed"   value="2001"/>
      <parameter key="encoding"   value="SYSTEM"/>
      <operator name="Extractor" class="FeatureExtraction">
          <list key="texts">
            <parameter key="news"   value=".\train\news"/>
            <parameter key="porn"   value=".\train\porn"/>
          </list>
          <parameter key="default_content_type"   value=""/>
          <parameter key="default_content_encoding"   value="UTF-8"/>
          <parameter key="default_content_language"   value="english"/>
          <parameter key="use_content_attributes"   value="false"/>
          <parameter key="id_attribute_type"   value="long"/>
          <list key="attributes">
            <parameter key="title"   value="//*/title/text() "/>
            <parameter key="#numberOfPages"   value="//*/numberOfPages/text()"/>
            <parameter key="keywords"   value="//*/keywords/text()"/>
          </list>
          <list key="namespaces">
          </list>
      </operator>
      <operator name="TextInput" class="TextInput" expanded="yes">
          <list key="texts">
            <parameter key="news"   value=".\train\news"/>
            <parameter key="porn"   value=".\train\porn"/>
          </list>
          <parameter key="default_content_type"   value=""/>
          <parameter key="default_content_encoding"   value="UTF-8"/>
          <parameter key="default_content_language"   value="english"/>
          <parameter key="prune_below"   value="-1"/>
          <parameter key="prune_above"   value="-1"/>
          <parameter key="vector_creation"   value="TFIDF"/>
          <parameter key="use_content_attributes"   value="false"/>
          <parameter key="use_given_word_list"   value="false"/>
          <parameter key="return_word_list"   value="true"/>
          <parameter key="output_word_list"   value=".\train\training_words.txt"/>
          <parameter key="id_attribute_type"   value="long"/>
          <list key="namespaces">
          </list>
          <parameter key="create_text_visualizer"   value="true"/>
          <parameter key="on_the_fly_pruning"   value="-1"/>
          <parameter key="extend_exampleset"   value="true"/>
          <operator name="StringTokenizer" class="StringTokenizer">
          </operator>
          <operator name="EnglishStopwordFilter" class="EnglishStopwordFilter">
          </operator>
          <operator name="TokenLengthFilter" class="TokenLengthFilter">
              <parameter key="min_chars"   value="3"/>
              <parameter key="max_chars"   value="2147483647"/>
          </operator>
          <operator name="PorterStemmer" class="PorterStemmer">
          </operator>
      </operator>
      <operator name="LibSVMLearner" class="LibSVMLearner">
          <parameter key="keep_example_set"   value="false"/>
          <parameter key="svm_type"   value="C-SVC"/>
          <parameter key="kernel_type"   value="linear"/>
          <parameter key="degree"   value="3"/>
          <parameter key="gamma"   value="0.0"/>
          <parameter key="coef0"   value="0.0"/>
          <parameter key="C"   value="0.0"/>
          <parameter key="nu"   value="0.5"/>
          <parameter key="cache_size"   value="80"/>
          <parameter key="epsilon"   value="0.0010"/>
          <parameter key="p"   value="0.1"/>
          <list key="class_weights">
          </list>
          <parameter key="shrinking"   value="true"/>
          <parameter key="calculate_confidences"   value="false"/>
          <parameter key="confidence_for_multiclass"   value="true"/>
      </operator>
      <operator name="ModelWriter" class="ModelWriter">
          <parameter key="model_file"   value=".\train\training_model.mod"/>
          <parameter key="overwrite_existing_file"   value="true"/>
          <parameter key="output_type"   value="Binary"/>
      </operator>
  </operator>
</process>
-----------------------------------------------
JAVA CODE
-----------------------------------------------
import java.io.File;
import java.io.IOException;
import com.rapidminer.RapidMiner;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorCreationException;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.OperatorService;
public class RapidMinerTextClassifier
{
   private OperatorChain wvtoolOperator;
   private Operator modelApplier;
   private Model model;
   public RapidMinerTextClassifier(File modelFile, File wordListFile)
         throws IOException, OperatorCreationException, OperatorException
   {
      //System.setProperty(RapidMiner.PROPERTY_RAPIDMINER_HOME, "C:\\Program Files\\Rapid-I\\RapidMiner\\lib"); //  "rapidminer.home"
      //System.setProperty("rapidminer.home", "D:\\Applications\\RapidMiner-4.2");
      System.setProperty("rapidminer.home", "C:\\Program Files\\Rapid-I\\RapidMiner");
      
      String pluginDirString = new File("C:\\Program Files\\Rapid-I\\RapidMiner\\lib\\plugins").getAbsolutePath();
      System.setProperty(RapidMiner.PROPERTY_RAPIDMINER_INIT_PLUGINS_LOCATION, pluginDirString);
      RapidMiner.init(false, false, false, true);
      
      // Create the text input operator and set the path to the word list you stored using Rapid Miner
      // As there is only a single text, we use the SingleTextInput operator
      wvtoolOperator = (OperatorChain) OperatorService.createOperator("SingleTextInput"); // I need TextInput ?????????????
      
      wvtoolOperator.setParameter("input_word_list", wordListFile.getAbsolutePath());
      // Add additional processing steps.
      // Note the setup must be same as the one you used when creating the classification model
      wvtoolOperator.addOperator(OperatorService.createOperator("StringTokenizer"));
      wvtoolOperator.addOperator(OperatorService.createOperator("EnglishStopwordFilter"));
      wvtoolOperator.addOperator(OperatorService.createOperator("TokenLengthFilter"));
      wvtoolOperator.addOperator(OperatorService.createOperator("PorterStemmer"));
      // Create the model applier
      modelApplier = OperatorService.createOperator("ModelApplier");
      // Load the model into a field of the class
      Operator modelLoader = OperatorService.createOperator("ModelLoader");
      modelLoader.setParameter("model_file", modelFile.getAbsolutePath());
      IOContainer container = modelLoader.apply(new IOContainer());
      model = container.get(Model.class);
   }
   public String apply(String text) throws OperatorException
   {
      // Set the text
      wvtoolOperator.setParameter("text", text);      
      //wvtoolOperator.setParameter("title", text);
      //wvtoolOperator.setParameter("keywords", text);
      //wvtoolOperator.setParameter("numberOfPages", int);
      
      
      // Call the text input operator
      IOContainer container = wvtoolOperator.apply(new IOContainer(model));
      // Call the model applier (the model was added already before calling the text input)
      container = modelApplier.apply(container);
      // Obtain the example set from the io container. It contains only a single example with our text in it.
      ExampleSet eset = container.get(ExampleSet.class);
      Example e = eset.iterator().next();
      // Compare the predicted label with the positive label      
      System.out.println(eset.getAttributes().getPredictedLabel().getMapping() + " " + e.getConfidence("porn") + " " + e.getConfidence("news"));
      return eset.getAttributes().getPredictedLabel().getMapping().mapIndex( (int)e.getPredictedLabel() );
   }
   public static void main(String args[]) throws Exception
   {
      
      // Create a text classifier 
      RapidMinerTextClassifier tr = new RapidMinerTextClassifier(
            new File(
                  "C:\\Main\\eclipse\\workspace\\octopus\\RapidMiner\\train\\training_model.mod"),
            new File(
                  "C:\\Main\\eclipse\\workspace\\octopus\\RapidMiner\\train\\training_words.txt"));
      // Call the classifier with texts
      System.out.println("Test1:" + tr.apply("povrai xflick resolution gif"));
      System.out.println("Test2:" + tr.apply("workstation intel switch"));
      System.out.println("Test3:" + tr.apply("sex porn sex povrai xflick resolution gif"));
   }
}