Hey everyone,
I am having some trouble with the "Search Twitter" operator and the "since id" parameter. I was hoping to use the same query multiple times while only requesting newer tweets, that had not been downloaded yet.
What I am trying to do is
- get tweets to a certain query
- then store those in my repository
- search (later) again with the same query, only this time getting newer tweets (greater the last ID of first search), to avoid hitting the rate limit
- finally combine those example sets
Not sure if I am missing something here...
Thanks for your help!
Here a sample process:
<?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="social_media:search_twitter" compatibility="9.6.000" expanded="true" height="82" name="First Search" width="90" x="45" y="136">
<parameter key="connection_source" value="repository"/>
<parameter key="connection_entry" value="//Local Repository/Connections/Twitter"/>
<parameter key="query" value="apples"/>
<parameter key="result_type" value="recent"/>
<parameter key="limit" value="500"/>
<parameter key="locale" value="en"/>
<parameter key="filter_by_geo_location" value="false"/>
<parameter key="radius_unit" value="miles"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="9.6.000" expanded="true" height="68" name="Get last ID" width="90" x="179" y="136">
<parameter key="macro" value="max_id"/>
<parameter key="macro_type" value="statistics"/>
<parameter key="statistics" value="max"/>
<parameter key="attribute_name" value="Id"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="store" compatibility="9.6.000" expanded="true" height="68" name="First example set" width="90" x="313" y="136">
<parameter key="repository_entry" value="../data/apples"/>
</operator>
<operator activated="true" class="retrieve" compatibility="9.6.000" expanded="true" height="68" name="Retrieve" width="90" x="45" y="493">
<parameter key="repository_entry" value="../data/apples"/>
</operator>
<operator activated="true" class="social_media:search_twitter" compatibility="9.6.000" expanded="true" height="82" name="Second Search" width="90" x="45" y="340">
<parameter key="connection_source" value="repository"/>
<parameter key="connection_entry" value="//Local Repository/Connections/Twitter"/>
<parameter key="query" value="apples"/>
<parameter key="result_type" value="recent"/>
<parameter key="limit" value="1000"/>
<parameter key="since_id" value="%{max_id}"/>
<parameter key="locale" value="en"/>
<parameter key="filter_by_geo_location" value="false"/>
<parameter key="radius_unit" value="miles"/>
</operator>
<operator activated="true" class="multiply" compatibility="9.6.000" expanded="true" height="103" name="Second example set" width="90" x="179" y="340"/>
<operator activated="true" class="append" compatibility="9.6.000" expanded="true" height="103" name="Combined example set" width="90" x="313" y="493">
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="merge_type" value="all"/>
</operator>
<connect from_op="First Search" from_port="output" to_op="Get last ID" to_port="example set"/>
<connect from_op="Get last ID" from_port="example set" to_op="First example set" to_port="input"/>
<connect from_op="First example set" from_port="through" to_port="result 1"/>
<connect from_op="Retrieve" from_port="output" to_op="Combined example set" to_port="example set 2"/>
<connect from_op="Second Search" from_port="output" to_op="Second example set" to_port="input"/>
<connect from_op="Second example set" from_port="output 1" to_port="result 2"/>
<connect from_op="Second example set" from_port="output 2" to_op="Combined example set" to_port="example set 1"/>
<connect from_op="Combined example set" from_port="merged set" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<description align="left" color="green" colored="true" height="196" resized="true" width="398" x="32" y="44">- Search for a string like &quot;apples&quot;<br>- Limit results to 500<br>- Store the example set to the local repository<br></description>
<description align="left" color="red" colored="true" height="174" resized="true" width="400" x="33" y="267">- Search again with same query string, e.g. &quot;apples&quot;<br>- Use the last ID from previous search<br/> - Limit results to 1000</description>
<description align="left" color="red" colored="true" height="167" resized="true" width="402" x="30" y="450">- Load (old) tweets from the repository<br/> - Combine with new search results</description>
<description align="left" color="yellow" colored="false" height="126" resized="false" width="180" x="904" y="138">- The first search returns as expected: 500 tweets<br/> - The second search has more results than expected, incl. results BEFORE the last ID</description>
</process>
</operator>
</process>