wrapping GetPage in HandleException - cannot open connection
cindyharper
New Altair Community Member
I'm getting this error for every call of GetPage in a HamdlException wrapper inside a Loop Examples loop.
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:05 AM INFO: Saving results.
Here's my process:
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:05 AM INFO: Saving results.
Here's my process:
Any suggestions? How do I delay between GETs for the Loop Examples loop? Will that help?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<parameter key="logverbosity" value="all"/>
<parameter key="logfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3log"/>
<parameter key="resultfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3result"/>
<process expanded="true" height="415" width="487">
<operator activated="true" class="retrieve" compatibility="5.2.006" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="GooglePagesStep2"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="45" y="210">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="URL"/>
<parameter key="attributes" value="Content-Length|Content-Type|Date|Expires|Last-Modified|Response-Code|Response-Message|URL|token_number"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="112" y="165">
<list key="function_descriptions">
<parameter key="PDF" value="contains(Href,".pdf")"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.2.006" expanded="true" height="76" name="Filter Examples" width="90" x="179" y="75"/>
<operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="30">
<process expanded="true" height="357" width="480">
<operator activated="true" class="handle_exception" compatibility="5.2.006" expanded="true" height="94" name="Handle Exception" width="90" x="45" y="30">
<parameter key="exception_macro" value="exceptionmsg"/>
<process expanded="true" height="375" width="225">
<operator activated="true" class="web:get_webpage" compatibility="5.1.004" expanded="true" height="60" name="Get Page" width="90" x="45" y="30">
<parameter key="url" value="Href"/>
<parameter key="random_user_agent" value="true"/>
<parameter key="connection_timeout" value="100000"/>
<parameter key="read_timeout" value="100000"/>
<parameter key="accept_cookies" value="all"/>
<list key="query_parameters"/>
</operator>
<operator activated="true" class="text:extract_token_number" compatibility="5.2.001" expanded="true" height="60" name="Extract Token Number" width="90" x="45" y="120">
<parameter key="condition" value="matches"/>
<parameter key="regular_expression" value="LIBRAR"/>
</operator>
<operator activated="true" class="text:extract_token_number" compatibility="5.2.001" expanded="true" height="60" name="Extract Token Number (2)" width="90" x="45" y="210">
<parameter key="metadata_key" value="LIBRARnumber"/>
<parameter key="condition" value="matches"/>
<parameter key="regular_expression" value="LIBRAR"/>
<parameter key="invert_condition" value="true"/>
</operator>
<operator activated="true" class="text:documents_to_data" compatibility="5.2.001" expanded="true" height="76" name="Documents to Data (2)" width="90" x="112" y="300">
<parameter key="text_attribute" value="NewsletterDoc"/>
<parameter key="label_attribute" value="Href"/>
</operator>
<connect from_port="in 1" to_port="out 1"/>
<connect from_op="Get Page" from_port="output" to_op="Extract Token Number" to_port="document"/>
<connect from_op="Extract Token Number" from_port="document" to_op="Extract Token Number (2)" to_port="document"/>
<connect from_op="Extract Token Number (2)" from_port="document" to_op="Documents to Data (2)" to_port="documents 1"/>
<connect from_op="Documents to Data (2)" from_port="example set" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
<process expanded="true" height="375" width="202">
<connect from_port="in 1" to_port="out 1"/>
<connect from_port="in 2" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Handle Exception" to_port="in 1"/>
<connect from_op="Handle Exception" from_port="out 1" to_port="example set"/>
<connect from_op="Handle Exception" from_port="out 2" to_port="output 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="flatten_collection" compatibility="5.2.006" expanded="true" height="60" name="Flatten Collection" width="90" x="246" y="165"/>
<operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store (2)" width="90" x="313" y="210">
<parameter key="repository_entry" value="GooglePagesStep3Docs"/>
</operator>
<operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store" width="90" x="380" y="120">
<parameter key="repository_entry" value="GooglePagesStep3Store"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_op="Store" to_port="input"/>
<connect from_op="Loop Examples" from_port="output 1" to_op="Flatten Collection" to_port="collection"/>
<connect from_op="Flatten Collection" from_port="flat" to_op="Store (2)" to_port="input"/>
<connect from_op="Store (2)" from_port="through" to_port="result 2"/>
<connect from_op="Store" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
For the delay you could try the Delay operator
Does the process run without Handle Exception? Your thread title indicates that you have no/less problems without that operator.
Oh no, I just saw that you entered "Href" as URL in Get Page - of course that does not work as it is not a valid url.
I suppose you have a Href attribute in your example set. Then you need something as below. Please note the Extract Macro operator in Loop Examples, and the use of the href-macro in Get Page.
Best,
Marius<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<parameter key="logverbosity" value="all"/>
<parameter key="logfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3log"/>
<parameter key="resultfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3result"/>
<process expanded="true" height="480" width="705">
<operator activated="true" class="retrieve" compatibility="5.2.006" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="GooglePagesStep2"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="180" y="30">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="URL"/>
<parameter key="attributes" value="Content-Length|Content-Type|Date|Expires|Last-Modified|Response-Code|Response-Message|URL|token_number"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="315" y="30">
<list key="function_descriptions">
<parameter key="PDF" value="contains(Href,".pdf")"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.2.006" expanded="true" height="76" name="Filter Examples" width="90" x="450" y="30"/>
<operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="94" name="Loop Examples" width="90" x="112" y="210">
<process expanded="true" height="357" width="480">
<operator activated="true" class="extract_macro" compatibility="5.2.006" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
<parameter key="macro" value="href"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Href"/>
<parameter key="example_index" value="%{example}"/>
</operator>
<operator activated="true" class="handle_exception" compatibility="5.2.006" expanded="true" height="94" name="Handle Exception" width="90" x="246" y="30">
<parameter key="exception_macro" value="exceptionmsg"/>
<process expanded="true" height="386" width="279">
<operator activated="true" class="web:get_webpage" compatibility="5.2.000" expanded="true" height="60" name="Get Page" width="90" x="45" y="30">
<parameter key="url" value="%{href}"/>
<parameter key="random_user_agent" value="true"/>
<parameter key="connection_timeout" value="100000"/>
<parameter key="read_timeout" value="100000"/>
<parameter key="accept_cookies" value="all"/>
<list key="query_parameters"/>
<list key="request_properties"/>
</operator>
<operator activated="true" class="text:extract_token_number" compatibility="5.2.002" expanded="true" height="60" name="Extract Token Number" width="90" x="45" y="120">
<parameter key="condition" value="matches"/>
<parameter key="regular_expression" value="LIBRAR"/>
</operator>
<operator activated="true" class="text:extract_token_number" compatibility="5.2.002" expanded="true" height="60" name="Extract Token Number (2)" width="90" x="45" y="210">
<parameter key="metadata_key" value="LIBRARnumber"/>
<parameter key="condition" value="matches"/>
<parameter key="regular_expression" value="LIBRAR"/>
<parameter key="invert_condition" value="true"/>
</operator>
<operator activated="true" class="text:documents_to_data" compatibility="5.2.002" expanded="true" height="76" name="Documents to Data (2)" width="90" x="179" y="210">
<parameter key="text_attribute" value="NewsletterDoc"/>
<parameter key="label_attribute" value="Href"/>
</operator>
<connect from_port="in 1" to_port="out 1"/>
<connect from_op="Get Page" from_port="output" to_op="Extract Token Number" to_port="document"/>
<connect from_op="Extract Token Number" from_port="document" to_op="Extract Token Number (2)" to_port="document"/>
<connect from_op="Extract Token Number (2)" from_port="document" to_op="Documents to Data (2)" to_port="documents 1"/>
<connect from_op="Documents to Data (2)" from_port="example set" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
<process expanded="true" height="375" width="202">
<connect from_port="in 1" to_port="out 1"/>
<connect from_port="in 2" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Handle Exception" to_port="in 1"/>
<connect from_op="Handle Exception" from_port="out 1" to_port="example set"/>
<connect from_op="Handle Exception" from_port="out 2" to_port="output 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="flatten_collection" compatibility="5.2.006" expanded="true" height="60" name="Flatten Collection" width="90" x="246" y="255"/>
<operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store (2)" width="90" x="380" y="255">
<parameter key="repository_entry" value="GooglePagesStep3Docs"/>
</operator>
<operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store" width="90" x="246" y="165">
<parameter key="repository_entry" value="GooglePagesStep3Store"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_op="Store" to_port="input"/>
<connect from_op="Loop Examples" from_port="output 1" to_op="Flatten Collection" to_port="collection"/>
<connect from_op="Flatten Collection" from_port="flat" to_op="Store (2)" to_port="input"/>
<connect from_op="Store (2)" from_port="through" to_port="result 2"/>
<connect from_op="Store" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>0