"Exception: java.lang.NullPointerException while clustering - More help please."
I am new to both data mining and RM, and was able to get started very quickly on a simple text mining problem. I ran into problems once I started clustering. I have almost 3000 text strings to cluster and none are blank. Each has a unique ID and a description. I'm getting the following error...
Exception: java.lang.NullPointerException error.
I tried re-installing the software per a previously reported issue, but that didn't help. I turned on debugging, but the trace is not helping me either. When I click "send bug report", I get the following error...
Cannot connect to BugZilla server. Please check your internet connection and try again.
My internet works fine (I'm able to run "updates and extensions" for example)
Here is my xml...
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
<context>
<input/>
<output>
<location>//Local Repository/Result 1 Process Document Cluster</location>
<location>//Local Repository/Result 2 clustering</location>
<location>//Local Repository/Result 3</location>
</output>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="5.3.013" expanded="true" height="60" name="Read CSV" width="90" x="179" y="210">
<parameter key="csv_file" value="C:\Users\lzd3rc\Documents\ADM - Project Files\CrowdSourcing\data mining clustingering trustworthiness\innovation network\AQA Miner\rapidminer\GATS Input File 7 rapidminder SMALL.csv"/>
<parameter key="column_separators" value=","/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="UNIQID.true.text.label"/>
<parameter key="1" value="EXP1.true.text.attribute"/>
</list>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Data" width="90" x="380" y="165">
<parameter key="create_word_vector" value="false"/>
<parameter key="add_meta_information" value="false"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="absolute"/>
<parameter key="prune_below_absolute" value="2"/>
<parameter key="prune_above_absolute" value="9999"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize (2)" width="90" x="45" y="30"/>
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases (2)" width="90" x="179" y="30"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.002" expanded="true" height="60" name="Filter Stopwords (2)" width="90" x="313" y="30"/>
<operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="447" y="75">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="99999"/>
</operator>
<operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="581" y="75">
<parameter key="max_length" value="3"/>
</operator>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
<connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="fast_k_means" compatibility="5.3.013" expanded="true" height="76" name="Clustering" width="90" x="514" y="300">
<parameter key="k" value="5"/>
<parameter key="determine_good_start_values" value="true"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Clustering" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="word list" to_port="result 1"/>
<connect from_op="Clustering" from_port="cluster model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Any help is appriciated. Thanks!!
Exception: java.lang.NullPointerException error.
I tried re-installing the software per a previously reported issue, but that didn't help. I turned on debugging, but the trace is not helping me either. When I click "send bug report", I get the following error...
Cannot connect to BugZilla server. Please check your internet connection and try again.
My internet works fine (I'm able to run "updates and extensions" for example)
Here is my xml...
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
<context>
<input/>
<output>
<location>//Local Repository/Result 1 Process Document Cluster</location>
<location>//Local Repository/Result 2 clustering</location>
<location>//Local Repository/Result 3</location>
</output>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="5.3.013" expanded="true" height="60" name="Read CSV" width="90" x="179" y="210">
<parameter key="csv_file" value="C:\Users\lzd3rc\Documents\ADM - Project Files\CrowdSourcing\data mining clustingering trustworthiness\innovation network\AQA Miner\rapidminer\GATS Input File 7 rapidminder SMALL.csv"/>
<parameter key="column_separators" value=","/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="UNIQID.true.text.label"/>
<parameter key="1" value="EXP1.true.text.attribute"/>
</list>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Data" width="90" x="380" y="165">
<parameter key="create_word_vector" value="false"/>
<parameter key="add_meta_information" value="false"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="absolute"/>
<parameter key="prune_below_absolute" value="2"/>
<parameter key="prune_above_absolute" value="9999"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize (2)" width="90" x="45" y="30"/>
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases (2)" width="90" x="179" y="30"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.002" expanded="true" height="60" name="Filter Stopwords (2)" width="90" x="313" y="30"/>
<operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="447" y="75">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="99999"/>
</operator>
<operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="581" y="75">
<parameter key="max_length" value="3"/>
</operator>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
<connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="fast_k_means" compatibility="5.3.013" expanded="true" height="76" name="Clustering" width="90" x="514" y="300">
<parameter key="k" value="5"/>
<parameter key="determine_good_start_values" value="true"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Clustering" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="word list" to_port="result 1"/>
<connect from_op="Clustering" from_port="cluster model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Any help is appriciated. Thanks!!