Instagram search enrich data with web service
Robi_Me
New Altair Community Member
Hi
I had this process working yesterday, and for some reason I am not getting it right today. I am attempting to extract comments from Instagram for sentiment analysis, I need to search on the account level as well as the keyword level.
I am not great at Json paths, have I done these correctly? When I paste the URL's into a web browser I am getting a Json response.
I had this process working yesterday, and for some reason I am not getting it right today. I am attempting to extract comments from Instagram for sentiment analysis, I need to search on the account level as well as the keyword level.
I am not great at Json paths, have I done these correctly? When I paste the URL's into a web browser I am getting a Json response.
<?xml version="1.0" encoding="UTF-8"?><process version="9.7.002"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="187"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="term, search saradioawards, https://www.instagram.com/saradioawards/?__a=1 pfizer,https://www.instagram.com/pfizerinc/?__a=1 washington_post,https://www.instagram.com/coveringpotus/?__a=1"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples" width="90" x="313" y="187"> <parameter key="iteration_macro" value="example"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34"> <parameter key="macro" value="search"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="search"/> <parameter key="example_index" value="%{example}"/> <list key="additional_macros"> <parameter key="term" value="term"/> </list> </operator> <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="313" y="34"> <parameter key="query_type" value="JsonPath"/> <list key="string_machting_queries"/> <parameter key="attribute_type" value="Nominal"/> <list key="regular_expression_queries"> <parameter key="foo" value=".*"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"/> <list key="namespaces"/> <parameter key="ignore_CDATA" value="true"/> <parameter key="assume_html" value="true"/> <list key="index_queries"/> <list key="jsonpath_queries"> <parameter key="bio" value="$.graphql.user.biography"/> <parameter key="full_name" value="$.graphql.user.full_name"/> <parameter key="user_id" value="$.graphql.user.id"/> <parameter key="is_business_account" value="$.graphql.user.is_business_account"/> <parameter key="category_name" value="$.graphql.user.business_category_name"/> <parameter key="category_enum" value="$.graphql.user.category_enum"/> <parameter key="category_user" value="$.graphql.user.category_name"/> <parameter key="connected_fb_page" value="$.graphql.user.connected_fb_page"/> <parameter key="edges" value="$.graphql.user.edge_owner_to_timeline_media.edges"/> </list> <parameter key="request_method" value="GET"/> <parameter key="url" value="%{search}"/> <parameter key="delay" value="1000"/> <list key="request_properties"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="keep_sensitive_headers" value="true"/> </operator> <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay" width="90" x="447" y="34"> <parameter key="delay" value="fixed"/> <parameter key="delay_amount" value="6000"/> <parameter key="min_delay_amount" value="0"/> <parameter key="max_delay_amount" value="1000"/> <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description> </operator> <connect from_port="example set" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Enrich Data by Webservice" to_port="Example Set"/> <connect from_op="Enrich Data by Webservice" from_port="ExampleSet" to_op="Delay" to_port="through 1"/> <connect from_op="Delay" from_port="through 1" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="false" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="391"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="term, search saradioawards, https://www.instagram.com/explore/tags/saradioawards/?__a=1&#10;pfizer,https://www.instagram.com/explore/tags/pfizer/?__a=1&#10;washington_post,https://www.instagram.com/explore/tags/washington post/?__a=1"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="false" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples (2)" width="90" x="313" y="391"> <parameter key="iteration_macro" value="example"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="34"> <parameter key="macro" value="search"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="search"/> <parameter key="example_index" value="%{example}"/> <list key="additional_macros"> <parameter key="term" value="term"/> </list> </operator> <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice (2)" width="90" x="313" y="34"> <parameter key="query_type" value="JsonPath"/> <list key="string_machting_queries"/> <parameter key="attribute_type" value="Nominal"/> <list key="regular_expression_queries"> <parameter key="foo" value=".*"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"/> <list key="namespaces"/> <parameter key="ignore_CDATA" value="true"/> <parameter key="assume_html" value="true"/> <list key="index_queries"/> <list key="jsonpath_queries"> <parameter key="search_id" value="$.graphql.hashtag.id"/> <parameter key="search" value="$.graphql.hashtag.name"/> <parameter key="posts" value="$.graphql.hashtag.edge_hashtag_to_media.count"/> </list> <parameter key="request_method" value="GET"/> <parameter key="url" value="%{search}"/> <parameter key="delay" value="1000"/> <list key="request_properties"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="keep_sensitive_headers" value="true"/> </operator> <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay (2)" width="90" x="447" y="34"> <parameter key="delay" value="random"/> <parameter key="delay_amount" value="1000"/> <parameter key="min_delay_amount" value="5000"/> <parameter key="max_delay_amount" value="9000"/> <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description> </operator> <connect from_port="example set" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Enrich Data by Webservice (2)" to_port="Example Set"/> <connect from_op="Enrich Data by Webservice (2)" from_port="ExampleSet" to_op="Delay (2)" to_port="through 1"/> <connect from_op="Delay (2)" from_port="through 1" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Examples" to_port="example set"/> <connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <description align="center" color="yellow" colored="false" height="169" resized="true" width="332" x="139" y="151">Keyword Search</description> <description align="center" color="yellow" colored="false" height="171" resized="true" width="328" x="143" y="348">Account Search</description> </process> </operator> </process>
Tagged:
0
Best Answer
-
Clear them cookies!
<?xml version="1.0" encoding="UTF-8"?><process version="9.7.002"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="187"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="term, search saradioawards, https://www.instagram.com/saradioawards/?__a=1 pfizer,https://www.instagram.com/pfizerinc/?__a=1 washington_post,https://www.instagram.com/coveringpotus/?__a=1"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples" width="90" x="313" y="187"> <parameter key="iteration_macro" value="example"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34"> <parameter key="macro" value="search"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="search"/> <parameter key="example_index" value="%{example}"/> <list key="additional_macros"> <parameter key="term" value="term"/> </list> </operator> <operator activated="true" class="web:clear_cookies" compatibility="9.3.001" expanded="true" height="82" name="Clear Cookies" width="90" x="380" y="85"> <parameter key="cookie_scope" value="global"/> </operator> <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="514" y="85"> <parameter key="query_type" value="JsonPath"/> <list key="string_machting_queries"/> <parameter key="attribute_type" value="Nominal"/> <list key="regular_expression_queries"> <parameter key="foo" value=".*"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"/> <list key="namespaces"/> <parameter key="ignore_CDATA" value="true"/> <parameter key="assume_html" value="true"/> <list key="index_queries"/> <list key="jsonpath_queries"> <parameter key="bio" value="$.graphql.user.biography"/> <parameter key="full_name" value="$.graphql.user.full_name"/> <parameter key="user_id" value="$.graphql.user.id"/> <parameter key="is_business_account" value="$.graphql.user.is_business_account"/> <parameter key="category_name" value="$.graphql.user.business_category_name"/> <parameter key="category_enum" value="$.graphql.user.category_enum"/> <parameter key="category_user" value="$.graphql.user.category_name"/> <parameter key="connected_fb_page" value="$.graphql.user.connected_fb_page"/> <parameter key="edges" value="$.graphql.user.edge_owner_to_timeline_media.edges"/> </list> <parameter key="request_method" value="GET"/> <parameter key="url" value="%{search}"/> <parameter key="delay" value="1000"/> <list key="request_properties"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="keep_sensitive_headers" value="false"/> </operator> <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay" width="90" x="648" y="85"> <parameter key="delay" value="random"/> <parameter key="delay_amount" value="1000"/> <parameter key="min_delay_amount" value="10000"/> <parameter key="max_delay_amount" value="12000"/> </operator> <connect from_port="example set" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Clear Cookies" to_port="through 1"/> <connect from_op="Clear Cookies" from_port="through 1" to_op="Enrich Data by Webservice" to_port="Example Set"/> <connect from_op="Enrich Data by Webservice" from_port="ExampleSet" to_op="Delay" to_port="through 1"/> <connect from_op="Delay" from_port="through 1" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="391"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="term, search saradioawards, https://www.instagram.com/explore/tags/saradioawards/?__a=1&#10;pfizer,https://www.instagram.com/explore/tags/pfizer/?__a=1&#10;washington_post,https://www.instagram.com/explore/tags/washington post/?__a=1"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples (2)" width="90" x="313" y="391"> <parameter key="iteration_macro" value="example"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="34"> <parameter key="macro" value="search"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="search"/> <parameter key="example_index" value="%{example}"/> <list key="additional_macros"> <parameter key="term" value="term"/> </list> </operator> <operator activated="true" class="web:clear_cookies" compatibility="9.3.001" expanded="true" height="82" name="Clear Cookies (2)" width="90" x="313" y="34"> <parameter key="cookie_scope" value="global"/> </operator> <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice (2)" width="90" x="447" y="34"> <parameter key="query_type" value="JsonPath"/> <list key="string_machting_queries"/> <parameter key="attribute_type" value="Nominal"/> <list key="regular_expression_queries"> <parameter key="foo" value=".*"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"/> <list key="namespaces"/> <parameter key="ignore_CDATA" value="true"/> <parameter key="assume_html" value="true"/> <list key="index_queries"/> <list key="jsonpath_queries"> <parameter key="search_id" value="$.graphql.hashtag.id"/> <parameter key="search" value="$.graphql.hashtag.name"/> <parameter key="posts" value="$.graphql.hashtag.edge_hashtag_to_media.count"/> </list> <parameter key="request_method" value="GET"/> <parameter key="url" value="%{search}"/> <parameter key="delay" value="1000"/> <list key="request_properties"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="keep_sensitive_headers" value="true"/> </operator> <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay (2)" width="90" x="581" y="34"> <parameter key="delay" value="random"/> <parameter key="delay_amount" value="1000"/> <parameter key="min_delay_amount" value="5000"/> <parameter key="max_delay_amount" value="9000"/> <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description> </operator> <connect from_port="example set" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Clear Cookies (2)" to_port="through 1"/> <connect from_op="Clear Cookies (2)" from_port="through 1" to_op="Enrich Data by Webservice (2)" to_port="Example Set"/> <connect from_op="Enrich Data by Webservice (2)" from_port="ExampleSet" to_op="Delay (2)" to_port="through 1"/> <connect from_op="Delay (2)" from_port="through 1" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Examples" to_port="example set"/> <connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/> <connect from_op="Create ExampleSet (2)" from_port="output" to_op="Loop Examples (2)" to_port="example set"/> <connect from_op="Loop Examples (2)" from_port="output 1" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> <description align="center" color="yellow" colored="false" height="169" resized="true" width="332" x="139" y="151">Keyword Search</description> <description align="center" color="yellow" colored="false" height="171" resized="true" width="328" x="143" y="348">Account Search</description> </process> </operator> </process>
0
Answers
-
Clear them cookies!
<?xml version="1.0" encoding="UTF-8"?><process version="9.7.002"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="187"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="term, search saradioawards, https://www.instagram.com/saradioawards/?__a=1 pfizer,https://www.instagram.com/pfizerinc/?__a=1 washington_post,https://www.instagram.com/coveringpotus/?__a=1"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples" width="90" x="313" y="187"> <parameter key="iteration_macro" value="example"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34"> <parameter key="macro" value="search"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="search"/> <parameter key="example_index" value="%{example}"/> <list key="additional_macros"> <parameter key="term" value="term"/> </list> </operator> <operator activated="true" class="web:clear_cookies" compatibility="9.3.001" expanded="true" height="82" name="Clear Cookies" width="90" x="380" y="85"> <parameter key="cookie_scope" value="global"/> </operator> <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="514" y="85"> <parameter key="query_type" value="JsonPath"/> <list key="string_machting_queries"/> <parameter key="attribute_type" value="Nominal"/> <list key="regular_expression_queries"> <parameter key="foo" value=".*"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"/> <list key="namespaces"/> <parameter key="ignore_CDATA" value="true"/> <parameter key="assume_html" value="true"/> <list key="index_queries"/> <list key="jsonpath_queries"> <parameter key="bio" value="$.graphql.user.biography"/> <parameter key="full_name" value="$.graphql.user.full_name"/> <parameter key="user_id" value="$.graphql.user.id"/> <parameter key="is_business_account" value="$.graphql.user.is_business_account"/> <parameter key="category_name" value="$.graphql.user.business_category_name"/> <parameter key="category_enum" value="$.graphql.user.category_enum"/> <parameter key="category_user" value="$.graphql.user.category_name"/> <parameter key="connected_fb_page" value="$.graphql.user.connected_fb_page"/> <parameter key="edges" value="$.graphql.user.edge_owner_to_timeline_media.edges"/> </list> <parameter key="request_method" value="GET"/> <parameter key="url" value="%{search}"/> <parameter key="delay" value="1000"/> <list key="request_properties"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="keep_sensitive_headers" value="false"/> </operator> <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay" width="90" x="648" y="85"> <parameter key="delay" value="random"/> <parameter key="delay_amount" value="1000"/> <parameter key="min_delay_amount" value="10000"/> <parameter key="max_delay_amount" value="12000"/> </operator> <connect from_port="example set" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Clear Cookies" to_port="through 1"/> <connect from_op="Clear Cookies" from_port="through 1" to_op="Enrich Data by Webservice" to_port="Example Set"/> <connect from_op="Enrich Data by Webservice" from_port="ExampleSet" to_op="Delay" to_port="through 1"/> <connect from_op="Delay" from_port="through 1" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="391"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="term, search saradioawards, https://www.instagram.com/explore/tags/saradioawards/?__a=1&#10;pfizer,https://www.instagram.com/explore/tags/pfizer/?__a=1&#10;washington_post,https://www.instagram.com/explore/tags/washington post/?__a=1"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_examples" compatibility="9.7.002" expanded="true" height="103" name="Loop Examples (2)" width="90" x="313" y="391"> <parameter key="iteration_macro" value="example"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.7.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="34"> <parameter key="macro" value="search"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="search"/> <parameter key="example_index" value="%{example}"/> <list key="additional_macros"> <parameter key="term" value="term"/> </list> </operator> <operator activated="true" class="web:clear_cookies" compatibility="9.3.001" expanded="true" height="82" name="Clear Cookies (2)" width="90" x="313" y="34"> <parameter key="cookie_scope" value="global"/> </operator> <operator activated="true" class="web:enrich_data_by_webservice" compatibility="9.0.000" expanded="true" height="68" name="Enrich Data by Webservice (2)" width="90" x="447" y="34"> <parameter key="query_type" value="JsonPath"/> <list key="string_machting_queries"/> <parameter key="attribute_type" value="Nominal"/> <list key="regular_expression_queries"> <parameter key="foo" value=".*"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"/> <list key="namespaces"/> <parameter key="ignore_CDATA" value="true"/> <parameter key="assume_html" value="true"/> <list key="index_queries"/> <list key="jsonpath_queries"> <parameter key="search_id" value="$.graphql.hashtag.id"/> <parameter key="search" value="$.graphql.hashtag.name"/> <parameter key="posts" value="$.graphql.hashtag.edge_hashtag_to_media.count"/> </list> <parameter key="request_method" value="GET"/> <parameter key="url" value="%{search}"/> <parameter key="delay" value="1000"/> <list key="request_properties"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="keep_sensitive_headers" value="true"/> </operator> <operator activated="true" class="delay" compatibility="9.7.002" expanded="true" height="82" name="Delay (2)" width="90" x="581" y="34"> <parameter key="delay" value="random"/> <parameter key="delay_amount" value="1000"/> <parameter key="min_delay_amount" value="5000"/> <parameter key="max_delay_amount" value="9000"/> <description align="center" color="transparent" colored="false" width="126">Because Social networks can be sticky about scraping</description> </operator> <connect from_port="example set" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Clear Cookies (2)" to_port="through 1"/> <connect from_op="Clear Cookies (2)" from_port="through 1" to_op="Enrich Data by Webservice (2)" to_port="Example Set"/> <connect from_op="Enrich Data by Webservice (2)" from_port="ExampleSet" to_op="Delay (2)" to_port="through 1"/> <connect from_op="Delay (2)" from_port="through 1" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Examples" to_port="example set"/> <connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/> <connect from_op="Create ExampleSet (2)" from_port="output" to_op="Loop Examples (2)" to_port="example set"/> <connect from_op="Loop Examples (2)" from_port="output 1" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> <description align="center" color="yellow" colored="false" height="169" resized="true" width="332" x="139" y="151">Keyword Search</description> <description align="center" color="yellow" colored="false" height="171" resized="true" width="328" x="143" y="348">Account Search</description> </process> </operator> </process>
0