Altair RISE
A program to recognize and reward our most engaged community members
Nominate Yourself Now!
Home
Discussions
Community Q&A
Using 3 GB RAM for Rapidminer
venkat
Hi All,
I am trying to process 143000 records and am using 3GB Ram for rapidminer. It is taking two many days for process. Input file size 337 MB only.
I integrated mysql with Rapidminer. I fed the data into mysql.
My XML is like this:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_database" compatibility="6.0.002" expanded="true" height="60" name="Read Database" width="90" x="45" y="30">
<parameter key="connection" value="mysql"/>
<parameter key="query" value="SELECT `id`, `title`, `keywords`, `keyphrases`, `description` FROM `cat45`"/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="6.0.002" expanded="true" height="76" name="Nominal to Text" width="90" x="45" y="165"/>
<operator activated="true" class="text:process_document_from_data" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Data" width="90" x="179" y="390">
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases" width="90" x="112" y="165"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.002" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="246" y="165"/>
<operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="380" y="165"/>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
<connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="nominal_to_numerical" compatibility="6.0.002" expanded="true" height="94" name="Nominal to Numerical" width="90" x="380" y="345">
<list key="comparison_groups"/>
</operator>
<operator activated="true" class="k_means" compatibility="6.0.002" expanded="true" height="76" name="Clustering" width="90" x="581" y="120">
<parameter key="k" value="4"/>
<parameter key="measure_types" value="MixedMeasures"/>
</operator>
<connect from_op="Read Database" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Nominal to Numerical" to_port="example set input"/>
<connect from_op="Nominal to Numerical" from_port="example set output" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Your help is very much appreciated.
Thanks in Advance,
Venkat
Find more posts tagged with
AI Studio
Accepted answers
All comments
fras
Could you provicd the name of the operator where the process starts and never returns ?
Perhaps you may reduce the size of your select statement only using "title" ? If this works you
really need more RAM.
Why do you need operator "Nominal to Numerical" if TF-IDF delivers numerical values for all tokens found ?
And last but not least: Why you do not apply the "tokenize" operator inside "Prozess Documents" operator ?
You should start with tokenizing first and if this works you may add further operators like Generate-N-Grams and so on.
Quick Links
All Categories
Recent Discussions
Activity
Unanswered
日本語 (Japanese)
한국어(Korean)
Groups