Altair RISE
A program to recognize and reward our most engaged community members
Nominate Yourself Now!
Home
Discussions
Community Q&A
Text Mining
NoorMohammad786
Hello everyone I am working on my master thesese, I have got the Tweets from Twitter. Now I want to minig the text that i have gotten and afterthat i want to do sentiment analysis is repaidminer can do this. one morething can i import excel datat and working on it.
Find more posts tagged with
AI Studio
Accepted answers
All comments
btibert
Sentiment analysis is 100% possible in RM. There is an operator for this task, and depending on your research, I might recommend labeling your dataset and fitting a model to predict sentiment. Some dictionary-based approaches are simple token lookups, whereas the latter might help you learn the nuances of your domain. Good luck.
kdafoe
Hi NoorMohammad786. Yes. I do this all the time in RapidMiner. Here is an XML process of one way to get you started. If you want to bring in Excel text data rather than Twitter data, just replace the
Search Twitter
operator with the
Retrieve
operator and modify the
Select Attributes
. You will also need the Operator Toolkit from the Marketplace for the
Extract Sentiment
operator.
<?xml version="1.0" encoding="UTF-8"?><process version="9.10.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="social_media:search_twitter" compatibility="9.6.000" expanded="true" height="82" name="Search Twitter" width="90" x="112" y="34">
<parameter key="connection_source" value="repository"/>
<parameter key="connection_entry" value="//Local Repository/Connections/TwitterNew"/>
<parameter key="query" value="Rapidminer"/>
<parameter key="result_type" value="recent or popular"/>
<parameter key="limit" value="100"/>
<parameter key="filter_by_geo_location" value="false"/>
<parameter key="radius_unit" value="miles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.10.001" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Text"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.10.001" expanded="true" height="82" name="Nominal to Text" width="90" x="380" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Text"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="514" y="34">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="9.4.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="85">
<parameter key="mode" value="specify characters"/>
<parameter key="characters" value=".,!?[{:;"(/"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="9.4.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="85">
<parameter key="transform_to" value="lower case"/>
</operator>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="operator_toolbox:extract_sentiment" compatibility="2.12.000" expanded="true" height="103" name="Extract Sentiment" width="90" x="648" y="136">
<parameter key="model" value="vader"/>
<parameter key="text_attribute" value="text"/>
<parameter key="show_advanced_output" value="true"/>
<parameter key="use_default_tokenization_regex" value="true"/>
<list key="additional_words"/>
</operator>
<connect from_op="Search Twitter" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Extract Sentiment" to_port="exa"/>
<connect from_op="Process Documents from Data" from_port="word list" to_port="result 1"/>
<connect from_op="Extract Sentiment" from_port="exa" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Quick Links
All Categories
Recent Discussions
Activity
Unanswered
日本語 (Japanese)
한국어(Korean)
Groups