🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

"apply weka:W-HierarchicalClusterer"

User: "MarcosRL"
New Altair Community Member
Updated by Jocelyn
Hello friends of the community. a query
I'm working with text mining - clustering
I performed the pre-processing text files, create the TF-IDF Vertor, filter the STOP-WORDS and the next step I need to apply  the algorithm "Weka: W-HierarchicalClusterer" but I get the following error:
Jan 28, 2013 6:01:32 PM SEVERE: Process failed: W-HierarchicalClusterer caused an error: com.rapidminer.operator.UserError:  caused an error: java.lang.ArrayIndexOutOfBoundsException: 10
Jan 28, 2013 6:01:32 PM SEVERE: Here:           Process[1] (Process)
          subprocess 'Main Process'
            +- Process Documents from Files[1] (Process Documents from Files)
          subprocess 'Vector Creation'
            |     +- Transform Cases[6] (Transform Cases)
            |     +- Tokenize[6] (Tokenize)
            |     +- Filter stopwords_pronombres_preposiciones[6] (Filter Stopwords (Dictionary))
            |     +- Filter stopwords_caratula[6] (Filter Stopwords (Dictionary))
            |     +- Filter Stopwords (English)[6] (Filter Stopwords (English))
            |     +- Filter Tokens (by Length)[6] (Filter Tokens (by Length))
      ==>   +- W-HierarchicalClusterer[1] (W-HierarchicalClusterer)

Add my process XML down


<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
   <process expanded="true" height="386" width="547">
     <operator activated="true" class="text:process_document_from_file" compatibility="5.2.004" expanded="true" height="76" name="Process Documents from Files" width="90" x="112" y="75">
       <list key="text_directories">
         <parameter key="doc1" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc1"/>
         <parameter key="doc2" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc2"/>
         <parameter key="doc3" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc3"/>
       </list>
       <process expanded="true" height="415" width="758">
         <operator activated="true" class="text:transform_cases" compatibility="5.2.004" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
         <operator activated="true" class="text:tokenize" compatibility="5.2.004" expanded="true" height="60" name="Tokenize" width="90" x="45" y="120"/>
         <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_pronombres_preposiciones" width="90" x="45" y="210">
           <parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_pronombres_preposiciones.txt"/>
         </operator>
         <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_caratula" width="90" x="45" y="300">
           <parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_caratula.txt"/>
         </operator>
         <operator activated="true" class="text:filter_stopwords_english" compatibility="5.2.004" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="179" y="30"/>
         <operator activated="true" class="text:filter_by_length" compatibility="5.2.004" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="179" y="120">
           <parameter key="min_chars" value="3"/>
         </operator>
         <connect from_port="document" to_op="Transform Cases" to_port="document"/>
         <connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
         <connect from_op="Tokenize" from_port="document" to_op="Filter stopwords_pronombres_preposiciones" to_port="document"/>
         <connect from_op="Filter stopwords_pronombres_preposiciones" from_port="document" to_op="Filter stopwords_caratula" to_port="document"/>
         <connect from_op="Filter stopwords_caratula" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
         <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
         <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
         <portSpacing port="source_document" spacing="0"/>
         <portSpacing port="sink_document 1" spacing="0"/>
         <portSpacing port="sink_document 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="weka:W-HierarchicalClusterer" compatibility="5.1.001" expanded="true" height="76" name="W-HierarchicalClusterer" width="90" x="309" y="173"/>
     <connect from_op="Process Documents from Files" from_port="example set" to_op="W-HierarchicalClusterer" to_port="example set"/>
     <connect from_op="W-HierarchicalClusterer" from_port="cluster model" to_port="result 1"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
   </process>
 </operator>
</process>


Find more posts tagged with