🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Get Values on the right position, using parent id

User: "m_moertl"
New Altair Community Member
Updated by Jocelyn

Hey there again :)!

 

First of all, i want to thank everybody who helped me out with my sorting problem! Now i have a new challange and i'm failing pretty hard. 

 

I've to do some sentiment analysis for my bachelor thesis. I builded up a huge database with lots of informations. I'm using Facepager to get data from facebook. With your help i managed to get the data in the perfect postion to analyse them. But now i need the "likes" for each post/comment/recomment in the same row as the post/comment/recomment.

 

I attached one of my testfiles, so you can see what i mean!

 

As you can see, most of the likes are just 1 row down (you can figuer that out with ID-parentID), that wouldn't be a problem to put them up. But for example, for the first row with ID=2 (that's the main post) the Summary.total_count is on the very end of the exampleset (Row 504). There you can also see, that it got the right parentId.

 

The Set is always spiltted up on 4 LVL types, but they are different inbetween. I'll try to explain. 

LVL1 is always a post, LVL2 can be a Comment or Likecount for LVL1, LVL 3 can be a Comment or a Likecount for LVL2 and LVL 4 is always the Likecount for LVL3 Comments. Likecounts are always OBJECTTYPE=offcut whilst comments or posts are always OBJECTTYPE=data.

 

Like i said, my database is huge and i can't sort them per hand and i'm failing to figure out how to build a proper process to fight this problem. I hope somebody can help me out here, i would really appriciate that.

 

Kind Regards

Mike

Find more posts tagged with

Sort by:
1 - 2 of 21

    Hi,

     

    i think what you want to do is a join from top down.

     

    Take all 4th level,

    join on parent_id = id with 3rd level

     

    Take all 3rd+4th level

    join on parend_id = id with 2nd level

     

    and

    Take all of this and join it on the first level data.

     

    The result is a line where you have for each 4th level post all the information from the upper level.

     

    Does it make sense? Otherwise i would need to find some time during the week to create it. 

     

    Cheers,

    Martin

    User: "m_moertl"
    New Altair Community Member
    OP

    Hey Martin,

     

    thanks for your fast respond and help! I used the Lag series in the end but had to build a big subprocess which is inspired of one i found in the net. I've added the XML!

     

    Thanks again!! You helped me out alot!

    Kind regards

    Mike

    <?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve 100TestSentiment" width="90" x="45" y="34">
    <parameter key="repository_entry" value="../data/Test/100TestSentiment"/>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Filter/Sorting" width="90" x="179" y="34">
    <process expanded="true">
    <operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Empty" width="90" x="45" y="34">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="objecttype.equals.empty"/>
    </list>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Error" width="90" x="179" y="34">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="querystatus.equals.error (400)"/>
    </list>
    </operator>
    <operator activated="true" class="generate_id" compatibility="8.0.001" expanded="true" height="82" name="Generate ID" width="90" x="45" y="136">
    <parameter key="offset" value="1"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="45" y="226">
    <parameter key="attribute_name" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="45" y="316">
    <parameter key="old_name" value="id"/>
    <parameter key="new_name" value="id1"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="generate_id" compatibility="8.0.001" expanded="true" height="82" name="Generate ID (2)" width="90" x="179" y="136"/>
    <operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role (2)" width="90" x="179" y="226">
    <parameter key="attribute_name" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename (2)" width="90" x="179" y="316">
    <parameter key="old_name" value="id"/>
    <parameter key="new_name" value="id2"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="313" y="34"/>
    <operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="313" y="191">
    <parameter key="remove_double_attributes" value="false"/>
    <parameter key="join_type" value="left"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="id1" value="id2"/>
    </list>
    </operator>
    <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename (4)" width="90" x="447" y="34">
    <parameter key="old_name" value="summary.total_count_from_ES2"/>
    <parameter key="new_name" value="likes"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="136">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="objecttype.equals.offcut"/>
    </list>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="581" y="38">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="total_like_count|querytype|message|level|from.name|created_time"/>
    </operator>
    <connect from_port="in 1" to_op="Filter Empty" to_port="example set input"/>
    <connect from_op="Filter Empty" from_port="example set output" to_op="Filter Error" to_port="example set input"/>
    <connect from_op="Filter Error" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Generate ID (2)" to_port="example set input"/>
    <connect from_op="Generate ID (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
    <connect from_op="Rename (2)" from_port="example set output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Join" to_port="left"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Join" to_port="right"/>
    <connect from_op="Join" from_port="join" to_op="Rename (4)" to_port="example set input"/>
    <connect from_op="Rename (4)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
    <connect from_op="Select Attributes (3)" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Sentiment Analysis" width="90" x="313" y="34">
    <process expanded="true">
    <operator activated="true" class="com.aylien.textapi.rapidminer:aylien_sentiment" compatibility="0.2.000" expanded="true" height="68" name="Analyze Sentiment" width="90" x="45" y="34">
    <parameter key="connection" value="Aylien Text Analysis Connection"/>
    <parameter key="input_attribute" value="message"/>
    </operator>
    <operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="179" y="34">
    <list key="specify_weights"/>
    </operator>
    <operator activated="true" class="com.aylien.textapi.rapidminer:aylien_document_classify_by_taxonomy" compatibility="0.2.000" expanded="true" height="82" name="Categorize (Document)" width="90" x="313" y="34">
    <parameter key="connection" value="Aylien Text Analysis Connection"/>
    </operator>
    <operator activated="true" class="text:documents_to_data" compatibility="7.5.000" expanded="true" height="82" name="Documents to Data" width="90" x="447" y="34">
    <parameter key="text_attribute" value="text"/>
    <parameter key="label_attribute" value="1"/>
    </operator>
    <connect from_port="in 1" to_op="Analyze Sentiment" to_port="Example Set"/>
    <connect from_op="Analyze Sentiment" from_port="Example Set" to_op="Data to Documents" to_port="example set"/>
    <connect from_op="Data to Documents" from_port="documents" to_op="Categorize (Document)" to_port="documents 1"/>
    <connect from_op="Categorize (Document)" from_port="documents" to_op="Documents to Data" to_port="documents 1"/>
    <connect from_op="Documents to Data" from_port="example set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Retrieve 100TestSentiment" from_port="output" to_op="Filter/Sorting" to_port="in 1"/>
    <connect from_op="Filter/Sorting" from_port="out 1" to_op="Sentiment Analysis" to_port="in 1"/>
    <connect from_op="Sentiment Analysis" from_port="out 1" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>