"apply weka:W-HierarchicalClusterer"

New Altair Community Member

Jan 28, 2013

Updated Nov 5, 2024 by Jocelyn

Hello friends of the community. a query
I'm working with text mining - clustering
I performed the pre-processing text files, create the TF-IDF Vertor, filter the STOP-WORDS and the next step I need to apply the algorithm "Weka: W-HierarchicalClusterer" but I get the following error:
Jan 28, 2013 6:01:32 PM SEVERE: Process failed: W-HierarchicalClusterer caused an error: com.rapidminer.operator.UserError: caused an error: java.lang.ArrayIndexOutOfBoundsException: 10
Jan 28, 2013 6:01:32 PM SEVERE: Here: Process[1] (Process)
subprocess 'Main Process'
+- Process Documents from Files[1] (Process Documents from Files)
subprocess 'Vector Creation'
| +- Transform Cases[6] (Transform Cases)
| +- Tokenize[6] (Tokenize)
| +- Filter stopwords_pronombres_preposiciones[6] (Filter Stopwords (Dictionary))
| +- Filter stopwords_caratula[6] (Filter Stopwords (Dictionary))
| +- Filter Stopwords (English)[6] (Filter Stopwords (English))
| +- Filter Tokens (by Length)[6] (Filter Tokens (by Length))
==> +- W-HierarchicalClusterer[1] (W-HierarchicalClusterer)

Add my process XML down



<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
    <process expanded="true" height="386" width="547">
      <operator activated="true" class="text:process_document_from_file" compatibility="5.2.004" expanded="true" height="76" name="Process Documents from Files" width="90" x="112" y="75">
        <list key="text_directories">
          <parameter key="doc1" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc1"/>
          <parameter key="doc2" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc2"/>
          <parameter key="doc3" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc3"/>
        </list>
        <process expanded="true" height="415" width="758">
          <operator activated="true" class="text:transform_cases" compatibility="5.2.004" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
          <operator activated="true" class="text:tokenize" compatibility="5.2.004" expanded="true" height="60" name="Tokenize" width="90" x="45" y="120"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_pronombres_preposiciones" width="90" x="45" y="210">
            <parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_pronombres_preposiciones.txt"/>
          </operator>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_caratula" width="90" x="45" y="300">
            <parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_caratula.txt"/>
          </operator>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="5.2.004" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="179" y="30"/>
          <operator activated="true" class="text:filter_by_length" compatibility="5.2.004" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="179" y="120">
            <parameter key="min_chars" value="3"/>
          </operator>
          <connect from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Filter stopwords_pronombres_preposiciones" to_port="document"/>
          <connect from_op="Filter stopwords_pronombres_preposiciones" from_port="document" to_op="Filter stopwords_caratula" to_port="document"/>
          <connect from_op="Filter stopwords_caratula" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="weka:W-HierarchicalClusterer" compatibility="5.1.001" expanded="true" height="76" name="W-HierarchicalClusterer" width="90" x="309" y="173"/>
      <connect from_op="Process Documents from Files" from_port="example set" to_op="W-HierarchicalClusterer" to_port="example set"/>
      <connect from_op="W-HierarchicalClusterer" from_port="cluster model" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Find more posts tagged with

AI Studio

Weka

🎉Community Raffle - Win $25

"apply weka:W-HierarchicalClusterer"

Find more posts tagged with

Quick Links