hello,
mikoo
New Altair Community Member
im currently doing sentiment analysis using rapidminer all the process have run successfully, but my questions is based on the result i want to produce count of sentiment for certain words based on my data. let say i want to know how many the word "location" appear and how much is the positive and negative sentiment for it based on another factor let say "university". what i mean is that i have an attribute university and i want to know for each university how many is the count of sentiment for the word location. is this possible to do? because when I use visualisation, if i want to know the count for location and let say also the word cost it will give me the same total for both word and i think it doesnt seem valid. can anyone help me how can i figure out this or how to properly plot the visualisation to get the right total count.
Tagged:
0
Answers
-
Hi mikoo,
yes this should be possible. Could you please share the process XML with us to look closer into the process?
Have you tried to use the Aggregate Operator and and group by the university?
Thank you & best regards
0 -
i have tried using the Aggregate Operator but it doesnt output the result that i want. if you can take a look of the file i attached here, what i want is for each university in the data i want to know how many positive & negative & neutral value for certain word, like facility, for university Malaya , is this possible to happen?1
-
Could you please share the dataset as wel? thank you!0
-
-
Hi, is this is the result you want to see?
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="10.3.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="operator_toolbox:caching_subprocess" compatibility="2.17.000" expanded="true" height="103" name="Subprocess (Caching)" width="90" x="179" y="34">
<parameter key="caching_strategy" value="auto"/>
<parameter key="cache_uuid" value="f03bf32c-0bdc-4273-903f-04149c6f2f6f"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="10.3.000" expanded="true" height="68" name="Retrieve Balanced dataset" width="90" x="447" y="85">
<parameter key="repository_entry" value="Balanced dataset"/>
</operator>
<operator activated="true" class="subprocess" compatibility="10.3.000" expanded="true" height="82" name="Pre-process data (2)" width="90" x="715" y="85">
<process expanded="true">
<operator activated="true" class="blending:select_attributes" compatibility="10.3.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="45" y="34">
<parameter key="type" value="include attributes"/>
<parameter key="attribute_filter_type" value="a subset"/>
<parameter key="select_attribute" value=""/>
<parameter key="select_subset" value="Location␞Review␞Univeristy Name"/>
<parameter key="also_apply_to_special_attributes_(id,_label..)" value="false"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="10.3.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="179" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="Univeristy Name.is_not_missing."/>
<parameter key="filters_entry_key" value="Review.is_not_missing."/>
<parameter key="filters_entry_key" value="Location.is_not_missing."/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="operator_toolbox:extract_sentiment" compatibility="2.17.000" expanded="true" height="103" name="Extract Sentiment (2)" width="90" x="313" y="34">
<parameter key="model" value="vader"/>
<parameter key="text_attribute" value="Review"/>
<parameter key="show_advanced_output" value="false"/>
<parameter key="use_default_tokenization_regex" value="true"/>
<list key="additional_words"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="10.3.000" expanded="true" height="82" name="Nominal to Text (3)" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<connect from_port="in 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Extract Sentiment (2)" to_port="exa"/>
<connect from_op="Extract Sentiment (2)" from_port="exa" to_op="Nominal to Text (3)" to_port="example set input"/>
<connect from_op="Nominal to Text (3)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="blending:generate_columns" compatibility="10.3.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="849" y="85">
<list key="function_descriptions">
<parameter key="Sentiment" value="if([Score]>0,"positive",if([Score]<0,"negative","neutral"))"/>
</list>
<parameter key="keep_all_columns" value="true"/>
</operator>
<operator activated="true" class="blending:set_role" compatibility="10.3.000" expanded="true" height="82" name="Set Role (3)" width="90" x="1117" y="85">
<list key="set_roles">
<parameter key="Sentiment" value="label"/>
</list>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="10.3.000" expanded="true" height="82" name="Nominal to Text (4)" width="90" x="1251" y="85">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="blending:set_role" compatibility="10.3.000" expanded="true" height="82" name="Set Role (4)" width="90" x="1385" y="85">
<list key="set_roles">
<parameter key="Univeristy Name" value="id"/>
</list>
</operator>
<operator activated="true" class="operator_toolbox:smote" compatibility="2.17.000" expanded="true" height="82" name="SMOTE Upsampling (2)" width="90" x="1519" y="85">
<parameter key="number_of_neighbours" value="5"/>
<parameter key="normalize" value="true"/>
<parameter key="equalize_classes" value="true"/>
<parameter key="upsampling_size" value="1000"/>
<parameter key="auto_detect_minority_class" value="true"/>
<parameter key="round_integers" value="true"/>
<parameter key="nominal_change_rate" value="0.5"/>
<parameter key="append_to_original" value="true"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="10.0.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="1653" y="85">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="10.0.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="10.0.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="10.0.000" expanded="true" height="68" name="Filter Stopwords (English) (2)" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (English) (2)" to_port="document"/>
<connect from_op="Filter Stopwords (English) (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Balanced dataset" from_port="output" to_op="Pre-process data (2)" to_port="in 1"/>
<connect from_op="Pre-process data (2)" from_port="out 1" to_op="Generate Attributes (2)" to_port="table input"/>
<connect from_op="Generate Attributes (2)" from_port="table output" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Nominal to Text (4)" to_port="example set input"/>
<connect from_op="Nominal to Text (4)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="SMOTE Upsampling (2)" to_port="exa"/>
<connect from_op="SMOTE Upsampling (2)" from_port="ups" to_op="Process Documents from Data (2)" to_port="example set"/>
<connect from_op="Process Documents from Data (2)" from_port="example set" to_port="out 1"/>
<connect from_op="Process Documents from Data (2)" from_port="word list" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<operator activated="true" class="declare_missing_value" compatibility="10.3.000" expanded="true" height="82" name="Declare Missing Value" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="mode" value="numeric"/>
<parameter key="numeric_value" value="0.0"/>
<parameter key="expression_value" value=""/>
</operator>
<operator activated="true" class="aggregate" compatibility="10.3.000" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
<parameter key="use_default_aggregation" value="true"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="count (ignoring missings)"/>
<list key="aggregation_attributes"/>
<parameter key="group_by_attributes" value="|Univeristy Name|Sentiment"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="10.3.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="countWithOutMissings\((.*?)\)"/>
<parameter key="replace_by" value="$1"/>
</operator>
<connect from_op="Subprocess (Caching)" from_port="out 1" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Subprocess (Caching)" from_port="out 2" to_port="result 1"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>0 -
hello, sorry for asking but is it possible for me to have the rmp file?0
-
Sure, here you go:
The only changes I made were adding a Declare Missing Values Operator for 0, and than aggregate per University and Sentiment using aggregation function " count (ignore missing)"0 -
alright thank you so much for your help.1