Sentiment based analysis by Aylien

sascha_tschin
sascha_tschin New Altair Community Member
edited November 5 in Altair RapidMiner

HI community! I am new to Rapid miner never used it before also not in school. However, for my thesis, I would like to use an aspect-based sentiment analysis by Aylien in order to build a text process in Rapid miner to analyze customer reviews. I read this blog post of Aylien: 

http://blog.aylien.com/building-a-text-analysis-process-for-customer-reviews-in-rapidminer/

 

It helped me a lot with understanding the program and what you can do with it. I am for 80-90% there, I only have some problems with the set-up of my CSV file (I think), which affect the tables of the correlation matrix, see the pictures.

 

correlation matrix.jpgExcel.jpg

 

My tables don't correlate with how it's posted on the blog.

Could someone, pinpoint what the problem might be? Again, don't have a lot of knowledge about the program yet, maybe it's something small or I am just too stupid to figure it out. However, would be nice to receive to help :). Hopefully, this all makes sense, haha

 

 XLM file

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="7.6.002" expanded="true" height="68" name="Read CSV" width="90" x="45" y="85">
<parameter key="csv_file" value="C:\Users\Sascha\Documents\Rapidminder\18 jan\test 1.csv"/>
<list key="annotations"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="com.aylien.textapi.rapidminer:aylien_absa" compatibility="0.2.000" expanded="true" height="68" name="Analyze Aspect-Based Sentiment" width="90" x="179" y="85">
<parameter key="connection" value="Aylien Text Analysis Connection"/>
<parameter key="input_attribute" value="Review"/>
<parameter key="domain" value="hotels"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.002" expanded="true" height="103" name="Multiply" width="90" x="313" y="85"/>
<operator activated="true" class="text:process_document_from_data" compatibility="7.5.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="581" y="187">
<parameter key="vector_creation" value="Term Occurrences"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="percentual"/>
<parameter key="data_management" value="memory-optimized"/>
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="aspects" value="1.0"/>
</list>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
<parameter key="mode" value="linguistic tokens"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="7.5.000" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34"/>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="7.5.000" expanded="true" height="68" name="Filter Tokens (by Content)" width="90" x="313" y="85">
<parameter key="string" value="[A-Za-z]*"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="7.5.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="447" y="85"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="7.5.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="581" y="85"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Tokens (by Content)" to_port="document"/>
<connect from_op="Filter Tokens (by Content)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="7.5.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="34">
<parameter key="vector_creation" value="Binary Term Occurrences"/>
<parameter key="keep_text" value="true"/>
<parameter key="data_management" value="memory-optimized"/>
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="aspects" value="1.0"/>
</list>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34">
<parameter key="mode" value="specify characters"/>
</operator>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:wordlist_to_data" compatibility="7.5.000" expanded="true" height="82" name="WordList to Data" width="90" x="581" y="34"/>
<operator activated="true" class="multiply" compatibility="7.6.002" expanded="true" height="124" name="Multiply (2)" width="90" x="715" y="34"/>
<operator activated="true" class="filter_examples" compatibility="7.6.002" expanded="true" height="103" name="Filter Examples (2)" width="90" x="849" y="340">
<parameter key="condition_class" value="all"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="sort" compatibility="7.6.002" expanded="true" height="82" name="Sort (2)" width="90" x="1050" y="544">
<parameter key="attribute_name" value="word"/>
<parameter key="sorting_direction" value="decreasing"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.6.002" expanded="true" height="103" name="Filter Examples" width="90" x="849" y="136">
<parameter key="condition_class" value="all"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="sort" compatibility="7.6.002" expanded="true" height="82" name="Sort" width="90" x="1050" y="289">
<parameter key="attribute_name" value="word"/>
<parameter key="sorting_direction" value="decreasing"/>
</operator>
<operator activated="true" class="split" compatibility="7.6.002" expanded="true" height="82" name="Split" width="90" x="1050" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="word"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="split_pattern" value=":"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.6.002" expanded="true" height="82" name="Generate ID" width="90" x="447" y="442"/>
<operator activated="true" class="generate_id" compatibility="7.6.002" expanded="true" height="82" name="Generate ID (2)" width="90" x="447" y="289"/>
<operator activated="true" class="join" compatibility="7.6.002" expanded="true" height="82" name="Join" width="90" x="648" y="391">
<list key="key_attributes"/>
</operator>
<operator activated="true" class="correlation_matrix" compatibility="7.6.002" expanded="true" height="103" name="Correlation Matrix" width="90" x="849" y="493"/>
<connect from_op="Read CSV" from_port="output" to_op="Analyze Aspect-Based Sentiment" to_port="Example Set"/>
<connect from_op="Analyze Aspect-Based Sentiment" from_port="Example Set" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Process Documents from Data (2)" to_port="example set"/>
<connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Generate ID (2)" to_port="example set input"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
<connect from_op="WordList to Data" from_port="example set" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Split" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 3" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Sort (2)" to_port="example set input"/>
<connect from_op="Sort (2)" from_port="example set output" to_port="result 4"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_port="result 1"/>
<connect from_op="Split" from_port="example set output" to_port="result 5"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Join" to_port="right"/>
<connect from_op="Generate ID (2)" from_port="example set output" to_op="Join" to_port="left"/>
<connect from_op="Join" from_port="join" to_op="Correlation Matrix" to_port="example set"/>
<connect from_op="Correlation Matrix" from_port="example set" to_port="result 2"/>
<connect from_op="Correlation Matrix" from_port="matrix" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
</process>
</operator>
</process>