Sentiments Analysis using external Dictionary
Hey there,
I just started using RapidMiner using an external dictionary (BAWL).
I've built a (at least in my eyes) proper process but as it seems, its not. Like I've said I'm really new into this and I really dont know whats wrong about it (really looking forward to every hint/explanation!). When I try to run the process everything seems to work but after a few seconds it prompts that the the process has failed.
Anyways, thanks in advance!
Danilo
Anyways, thanks in advance!
Danilo
Find more posts tagged with
Sort by:
1 - 3 of
31
Hi @mschmitz,
thanks for the quick response! I'm sure you've improved my process but now it says that the model I'm trying to apply needs to work on tokenized documents. I dont get it, as far as I'm concerned I'm using the tokenize operator. Do you have any clue?
thanks for the quick response! I'm sure you've improved my process but now it says that the model I'm trying to apply needs to work on tokenized documents. I dont get it, as far as I'm concerned I'm using the tokenize operator. Do you have any clue?
Thank you,
Danilo
Hi @DDresen ,
this sounds odd, can you share the excel files so i can have a look? I am available at mschmitz at rapidminer.com.
Martin
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.6.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="391">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="9.6.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
<parameter key="excel_file" value="/Users/danilodresen/Documents/TH_Koeln/Bachelorarbeit/RapidMiner/Dictionaries/Bawl_reduziert.xls"/>
<parameter key="sheet_selection" value="sheet number"/>
<parameter key="sheet_number" value="1"/>
<parameter key="imported_cell_range" value="B1:D10485776"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="date_format" value=""/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="WORD_LOWER.true.polynominal.attribute"/>
<parameter key="1" value="WORD_CLASS.true.polynominal.attribute"/>
<parameter key="2" value="EMO_MEAN.true.real.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.6.000" expanded="true" height="82" name="Set Role" width="90" x="179" y="34">
<parameter key="attribute_name" value="WORD_LOWER"/>
<parameter key="target_role" value="regular"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="text_to_nominal" compatibility="9.6.000" expanded="true" height="82" name="Text to Nominal" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="text"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="text"/>
<parameter key="block_type" value="value_matrix"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Text to Nominal" to_port="example set input"/>
<connect from_op="Text to Nominal" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="operator_toolbox:dictionary_sentiment_learner" compatibility="2.5.000-SNAPSHOT" expanded="true" height="82" name="Dictionary-Based Sentiment (Documents)" width="90" x="179" y="391">
<parameter key="value_attribute" value="EMO_MEAN"/>
<parameter key="key_attribute" value="WORD_LOWER"/>
<parameter key="negation_attribute" value=""/>
<parameter key="negation_window_size" value="1"/>
<parameter key="use_symmetric_negation_window" value="false"/>
</operator>
<operator activated="true" class="subprocess" compatibility="9.6.000" expanded="true" height="82" name="Subprocess (2)" width="90" x="45" y="85">
<process expanded="true">
<operator activated="true" class="text:read_document" compatibility="9.3.001" expanded="true" height="68" name="Read deutsch QS" width="90" x="45" y="34">
<parameter key="file" value="/Users/danilodresen/Documents/TH_Koeln/Bachelorarbeit/RapidMiner/JSON/deutsch_QS.json"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<operator activated="true" class="text:read_document" compatibility="9.3.001" expanded="true" height="68" name="Read franz QS" width="90" x="45" y="136">
<parameter key="file" value="/Users/danilodresen/Documents/TH_Koeln/Bachelorarbeit/RapidMiner/JSON/fanz_qs.json"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<operator activated="true" class="text:json_to_data" compatibility="9.3.001" expanded="true" height="103" name="JSON To Data" width="90" x="179" y="34">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
<parameter key="guess_data_types" value="true"/>
<parameter key="keep_missing_attributes" value="false"/>
<parameter key="missing_values_aliases" value=", null, NaN, missing"/>
</operator>
<operator activated="true" class="text:data_to_documents" compatibility="9.3.001" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="34">
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
</operator>
<connect from_op="Read deutsch QS" from_port="output" to_op="JSON To Data" to_port="documents 1"/>
<connect from_op="Read franz QS" from_port="output" to_op="JSON To Data" to_port="documents 2"/>
<connect from_op="JSON To Data" from_port="example set" to_op="Data to Documents" to_port="example set"/>
<connect from_op="Data to Documents" from_port="documents" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop_collection" compatibility="9.6.000" expanded="true" height="82" name="Loop Collection" width="90" x="179" y="85">
<parameter key="set_iteration_macro" value="false"/>
<parameter key="macro_name" value="iteration"/>
<parameter key="macro_start_value" value="1"/>
<parameter key="unfold" value="false"/>
<process expanded="true">
<operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases" width="90" x="112" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize" width="90" x="246" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:filter_stopwords_german" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (German)" width="90" x="380" y="34">
<parameter key="stop_word_list" value="Standard"/>
</operator>
<operator activated="true" class="text:stem_german" compatibility="9.3.001" expanded="true" height="68" name="Stem (German)" width="90" x="514" y="34"/>
<connect from_port="single" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (German)" to_port="document"/>
<connect from_op="Filter Stopwords (German)" from_port="document" to_op="Stem (German)" to_port="document"/>
<connect from_op="Stem (German)" from_port="document" to_port="output 1"/>
<portSpacing port="source_single" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="operator_toolbox:apply_model_documents" compatibility="2.5.000-SNAPSHOT" expanded="true" height="103" name="Apply Model (Documents)" width="90" x="514" y="238">
<list key="application_parameters"/>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Dictionary-Based Sentiment (Documents)" to_port="exa"/>
<connect from_op="Dictionary-Based Sentiment (Documents)" from_port="mod" to_op="Apply Model (Documents)" to_port="mod"/>
<connect from_op="Subprocess (2)" from_port="out 1" to_op="Loop Collection" to_port="collection"/>
<connect from_op="Loop Collection" from_port="output 1" to_op="Apply Model (Documents)" to_port="doc"/>
<connect from_op="Apply Model (Documents)" from_port="exa" to_port="result 1"/>
<connect from_op="Apply Model (Documents)" from_port="doc" to_port="result 3"/>
<connect from_op="Apply Model (Documents)" from_port="mod" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>