"I need help (java RM)"

platanas20
platanas20 New Altair Community Member
edited November 5 in Community Q&A
Hi to everyone,
I use netbeans to create a project in rapidminer. My problem is that the same project in rapidminer run , in netbeans something goes wrong and i dont know what it is.
My xml code in RM:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.1.008" expanded="true" name="Process">
    <parameter key="encoding" value="UTF-8"/>
    <process expanded="true" height="494" width="547">
      <operator activated="true" class="text:read_document" compatibility="5.1.002" expanded="true" height="60" name="Read Document" width="90" x="45" y="210">
        <parameter key="file" value="H:\Epifaneia Ergasias\RapidMiner\yes.txt"/>
        <parameter key="encoding" value="UTF-8"/>
      </operator>
      <operator activated="true" class="read_model" compatibility="5.1.008" expanded="true" height="60" name="Read Model" width="90" x="246" y="165">
        <parameter key="model_file" value="C:\Users\platanas\Desktop\Rapid Miner Java\Training\Training_%{a}.mode"/>
      </operator>
      <operator activated="true" class="text:process_documents" compatibility="5.1.002" expanded="true" height="94" name="Process Documents" width="90" x="179" y="345">
        <parameter key="parallelize_vector_creation" value="true"/>
        <process expanded="true" height="519" width="561">
          <operator activated="true" class="text:tokenize" compatibility="5.1.002" expanded="true" height="60" name="Tokenize (2)" width="90" x="45" y="30"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.1.002" expanded="true" height="60" name="Filter Stopwords (2)" width="90" x="246" y="30">
            <parameter key="file" value="H:\Epifaneia Ergasias\RapidMiner\STOPWORDS.txt"/>
            <parameter key="encoding" value="UTF-8"/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="5.1.002" expanded="true" height="60" name="Transform Cases" width="90" x="380" y="30"/>
          <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
          <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="apply_model" compatibility="5.1.008" expanded="true" height="76" name="Apply Model" width="90" x="447" y="300">
        <list key="application_parameters"/>
      </operator>
      <connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
      <connect from_op="Read Model" from_port="output" to_op="Apply Model" to_port="model"/>
      <connect from_op="Process Documents" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
      <connect from_op="Apply Model" from_port="model" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

My code in Netbeans is
import com.rapidminer.Process;
import com.rapidminer.RapidMiner;
import com.rapidminer.RapidMiner.ExecutionMode;
import com.rapidminer.operator.ExecutionUnit;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.ModelApplier;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.text.io.DocumentLoader;
import com.rapidminer.operator.text.io.DocumentTextInputOperator;
import com.rapidminer.operator.io.ModelLoader;
import com.rapidminer.operator.text.io.tokenizer.StringTokenizerOperator;
import com.rapidminer.operator.text.io.wordfilter.StopwordFilterOperator;
import com.rapidminer.operator.text.io.transformer.CaseTransformationOperator;
import com.rapidminer.tools.OperatorService;

import java.io.File;



public class Clustering {

    /**
* Connect the output-port <code>fromPortName</code> from Operator
* <code>from</code> with the input-port <code>toPortName</code> of Operator
* <code>to</code>.
*/
      private static void connect(Operator from, String fromPortName,
Operator to, String toPortName) {
from.getOutputPorts().getPortByName(fromPortName).connectTo(
to.getInputPorts().getPortByName(toPortName));
}

/**
* Connect the output-port <code>fromPortName</code> from Subprocess
* <code>from</code> with the input-port <code>toPortName</code> of Operator
* <code>to</code>.
*/
private static void connect(ExecutionUnit from, String fromPortName,
Operator to, String toPortName) {
from.getInnerSources().getPortByName(fromPortName).connectTo(
to.getInputPorts().getPortByName(toPortName));
}

/**
* Connect the output-port <code>fromPortName</code> from Operator
* <code>from</code> with the input-port <code>toPortName</code> of
* Subprocess <code>to</code>.
*/
private static void connect(Operator from, String fromPortName,
ExecutionUnit to, String toPortName) {
from.getOutputPorts().getPortByName(fromPortName).connectTo(
to.getInnerSinks().getPortByName(toPortName));
}

        //main
public static void main(String[] argv) throws Exception {
                // init rapidminer
RapidMiner.setExecutionMode(ExecutionMode.COMMAND_LINE);
RapidMiner.init();

// Create a process
final Process process = new Process();

                // Set the parameters of process
              // process.getRootOperator().setParameter("parallelize_main_process", "true");
                process.getRootOperator().setParameter("encoding", "UTF-8");
// all operators from "left to right"

                //read document
                final DocumentLoader readdocument = OperatorService
.createOperator(DocumentLoader.class);
                // Set the parameters of read document
                readdocument.setParameter("file","H:/Epifaneia Ergasias/RapidMiner/yes.txt");
                readdocument.setParameter("encoding", "UTF-8");

                //process documents
                final  DocumentTextInputOperator processdocuments = OperatorService
.createOperator(DocumentTextInputOperator.class);
                processdocuments.setParameter("parallelize_vector_creation","true");
               

                // read model
                final ModelLoader readmodel = OperatorService
.createOperator(ModelLoader.class);
                // Set the parameters of read model
                readmodel.setParameter("model_file","C:/Users/platanas/Desktop/Rapid Miner Java/Training/Training_%{a}.mode");

                //apply model
                final Operator modelApplier = OperatorService
.createOperator(ModelApplier.class);

                // add operators to the main process and connect them
                process.getRootOperator().getSubprocess(0).addOperator(readdocument);
                process.getRootOperator().getSubprocess(0).addOperator(readmodel);
                process.getRootOperator().getSubprocess(0).addOperator(processdocuments);             
                process.getRootOperator().getSubprocess(0).addOperator(modelApplier);

                connect(readdocument,"output",processdocuments,"documents 1");
                connect(readmodel,"output",modelApplier,"model");
                connect(processdocuments,"example set",modelApplier,"unlabelled data");
                connect(modelApplier,"labelled data",process.getRootOperator().getSubprocess(0),"result 1");
                connect(modelApplier,"model",process.getRootOperator().getSubprocess(0),"result 2");

                //operators  inside the process documents

                final StringTokenizerOperator tokenize=OperatorService
.createOperator(StringTokenizerOperator.class);
                final StopwordFilterOperator stopwords=OperatorService
.createOperator(StopwordFilterOperator.class);
                // Set the parameters of stopwords
                stopwords.setParameter("file","H:/Epifaneia Ergasias/RapidMiner/STOPWORDS.txt");
                stopwords.setParameter("encoding", "UTF-8");

                  final CaseTransformationOperator tranformcases=OperatorService
.createOperator(CaseTransformationOperator.class);

                // add operators to the process documents from files and connect them
                processdocuments.getSubprocess(0).addOperator(tokenize);
                processdocuments.getSubprocess(0).addOperator(stopwords);
                processdocuments.getSubprocess(0).addOperator(tranformcases);

                connect(processdocuments.getSubprocess(0),"document",tokenize,"document");
                connect(tokenize,"document",stopwords,"document");
                connect(stopwords,"document",tranformcases,"document");
                connect(tranformcases,"document",processdocuments.getSubprocess(0), "document 1");
               

                // print process setup
System.out.println(process.getRootOperator().createProcessTree(0));

                  File x=new File ("C://Users//platanas//Desktop//Rapid Miner Java//Clustering//Clustering.rmp");

                process.save(x);

// perform process
//process.run();
                IOContainer ioResult=process.run();
                System.out.println(ioResult.getElementAt(0).toString());

    }

}
And the error is:
Aug 14, 2011 10:25:53 PM com.rapidminer.Process run
INFO: Process starts
Exception in thread "main" java.lang.StringIndexOutOfBoundsException: String index out of range: -1
        at java.lang.String.substring(String.java:1932)
        at java.lang.String.substring(String.java:1905)
        at com.rapidminer.operator.text.io.DocumentLoader.read(DocumentLoader.java:72)
        at com.rapidminer.operator.text.io.DocumentLoader.read(DocumentLoader.java:49)
        at com.rapidminer.operator.io.AbstractReader.doWork(AbstractReader.java:123)
        at com.rapidminer.operator.Operator.execute(Operator.java:831)
        at com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:51)
        at com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:709)
        at com.rapidminer.operator.OperatorChain.doWork(OperatorChain.java:369)
        at com.rapidminer.operator.Operator.execute(Operator.java:831)
        at com.rapidminer.Process.run(Process.java:902)
        at com.rapidminer.Process.run(Process.java:798)
        at com.rapidminer.Process.run(Process.java:793)
        at com.rapidminer.Process.run(Process.java:783)
        at Clustering.main(Clustering.java:137)

Thanks
platanas
Tagged:

Answers

  • Marco_Boeck
    Marco_Boeck New Altair Community Member
    Hi,

    I moved this to the Development forum, as this is a coding problem.

    In regards to your problem I will say what I always say:

    It is way, way easier and a lot less error-prone to design all processes in RapidMiner and then execute them via process.run() in java. Otherwise errors like yours may pop up and frankly speaking I don't have the time to go over your code.

    See this: http://rapid-i.com/rapidforum/index.php/topic,3862.msg14364.html#msg14364

    Regards,
    Marco