I see there are alot of topics on xpath, however I just lack the expereince in this field and really need some help.
The below is the first line of entries from a client feed, there are numerous entries after the 1st. For some reason I am only picking up the first entry and need help on the correct syntax or opperators to pull the entire feed into a MySQL database.
The client feed:
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:g="http://base.google.com/ns/1.0">
<entry>
<g:additional_image_link>http://www.client.co.za/cs/groups/public/documents/client.co.za_portal_webassets/file1518447240125huge</g:additional_image_link>
<g:availability>In Stock</g:availability>
<g:brand>Alcatel One Touch</g:brand>
<g:condition>new</g:condition>
<g:description>500MB 24 Month Data Top Up Price Plan</g:description>
<g:google_product_category>Electronics > Computers > Tablet Computers</g:google_product_category>
<g:gtin>DV8FY44</g:gtin>
<g:id>470654</g:id>
<g:link>http://www.client.co.za/vodacom/shopping/devices/alcatelonetouch-deals/client-smart-tab-2-3g/500mb-24-month-data-top-up-price-plan/470654</g:link>
<g:mpn>DV8FY44</g:mpn>
<g:price>129.00</g:price>
<g:product_type>Tablet</g:product_type>
<g:shipping>
<g:country>SA</g:country>
<g:price>R0.00</g:price>
<g:service>Free next day delivery</g:service>
</g:shipping>
<g:title>Smart Tab 2 3G on 500MB 24 Month Data Top Up Price Plan on a 24 month contract</g:title>
</entry>
</feed>
There would be a stack more entries beterrn the </entry> and </feed> lines.
Here is my RapidMiner process:
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="45" y="136">
<parameter key="url" value="http://www.client.co.za/client/GoogleDealFeed"/>
<parameter key="random_user_agent" value="true"/>
<parameter key="connection_timeout" value="9910000"/>
<parameter key="read_timeout" value="9910000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="all"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="text:documents_to_data" compatibility="7.5.000" expanded="true" height="82" name="Documents to Data" width="90" x="179" y="136">
<parameter key="text_attribute" value="text"/>
<parameter key="label_attribute" value="text"/>
<parameter key="add_meta_information" value="false"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="313" y="136">
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="loop_collection" compatibility="8.1.000" expanded="true" height="82" name="Loop Collection" width="90" x="447" y="136">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="macro_name" value="iteration"/>
<parameter key="macro_start_value" value="1"/>
<parameter key="unfold" value="false"/>
<process expanded="true">
<operator activated="true" class="text:write_document" compatibility="7.5.000" expanded="true" height="82" name="Write Document" width="90" x="380" y="289">
<parameter key="overwrite" value="true"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<operator activated="true" class="advanced_file_connectors:read_xml" compatibility="8.1.000" expanded="true" height="68" name="Read XML" width="90" x="581" y="289">
<parameter key="file" value="/Users/robinmeisel/Desktop/set-up.xml"/>
<parameter key="xpath_for_examples" value="//default:feed"/>
<enumeration key="xpaths_for_attributes">
<parameter key="xpath_for_attribute" value="default:entry[1]/g:availability/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:brand/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:condition/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:description/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:google_product_category/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:gtin/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:id/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:link/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:mpn/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:price/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:product_type/node()"/>
<parameter key="xpath_for_attribute" value="default:entry[1]/g:title/node()"/>
</enumeration>
<parameter key="use_namespaces" value="true"/>
<list key="namespaces">
<parameter key="g" value="http://base.google.com/ns/1.0"/>
<parameter key="default" value="http://www.w3.org/2005/Atom"/>
</list>
<parameter key="use_default_namespace" value="true"/>
<parameter key="default_namespace" value="http://www.w3.org/2005/Atom"/>
<parameter key="parse_numbers" value="false"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value=""/>
<list key="annotations"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="default:entry[1]/g:availability[*]/text().true.attribute_value.attribute"/>
<parameter key="1" value="default:entry[1]/g:brand[*]/text().true.attribute_value.attribute"/>
<parameter key="2" value="default:entry[1]/g:condition[*]/text().true.attribute_value.attribute"/>
<parameter key="3" value="default:entry[1]/g:description[*]/text().true.attribute_value.attribute"/>
<parameter key="4" value="default:entry[1]/g:google_product_category[*]/text().true.attribute_value.attribute"/>
<parameter key="5" value="default:entry[1]/g:gtin[*]/text().true.attribute_value.attribute"/>
<parameter key="6" value="default:entry[1]/g:id[1]/text().true.attribute_value.attribute"/>
<parameter key="7" value="default:entry[1]/g:link[1]/text().true.attribute_value.attribute"/>
<parameter key="8" value="default:entry[1]/g:mpn[1]/text().true.attribute_value.attribute"/>
<parameter key="9" value="default:entry[1]/g:price[1]/text().true.attribute_value.attribute"/>
<parameter key="10" value="default:entry[1]/g:product_type[1]/text().true.attribute_value.attribute"/>
<parameter key="11" value="default:entry[1]/g:title[1]/text().true.attribute_value.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
<connect from_port="single" to_op="Write Document" to_port="document"/>
<connect from_op="Write Document" from_port="file" to_op="Read XML" to_port="file"/>
<connect from_op="Read XML" from_port="output" to_port="output 1"/>
<portSpacing port="source_single" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
</process>
As you can see in the RapidMiner operators I have used xpath queries of
<parameter key="xpath_for_attribute" value="default:entry[1]/g:mpn/node()"/>
But that only returns the first character. What is the correct xpath to obtain this information from the feed?