I am trying to figure out how the LDA operator works. I have a data file on excel with 630 rows of text data. I ran this through Process Documents from Data to generate the 'term occurrence' word vector as stated in the LDA manual. However when I feed this into the LDA operator, after a while I simply get an out-of-memory error. I've tried it with 4GB of RAM and 16 GB of RAM. What am I doing wrong? Attached is the xml. Thank you.
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.5.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="7.5.001" expanded="true" height="68" name="Read Excel" width="90" x="112" y="85">
<parameter key="excel_file" value="C:\Users\Pari\OneDrive\ADMIN onedrive\Projects\Valbot\Rel8ed\Valbot RapidMiner-Export-Import\Data\Landscape Text from Blogs.xlsx"/>
<parameter key="imported_cell_range" value="A1:A637"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="backyard landscaping ideas oakville ontario backyard landscaping ideas oakville ontario backyard la….true.text.attribute"/>
</list>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="7.5.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="246" y="85">
<parameter key="vector_creation" value="Term Occurrences"/>
<parameter key="prune_method" value="absolute"/>
<parameter key="prune_below_absolute" value="50"/>
<parameter key="prune_above_absolute" value="600"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:transform_cases" compatibility="7.5.000" expanded="true" height="68" name="Transform Cases" width="90" x="45" y="34"/>
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="7.5.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="313" y="34"/>
<operator activated="true" class="text:filter_by_length" compatibility="7.5.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="447" y="34">
<parameter key="min_chars" value="3"/>
</operator>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="corpus_linguistics_plugin_LDA:lda_topic_model" compatibility="1.1.001" expanded="true" height="145" name="Latent Dirichlet Allocation" width="90" x="447" y="85"/>
<connect from_op="Read Excel" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Latent Dirichlet Allocation" to_port="example set of documents as Bag-of-Words vectors with term occurrences"/>
<connect from_op="Latent Dirichlet Allocation" from_port="cluster model" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>