[SOLVED] Help with xml, xpath, namespaces.
cindyharper
New Altair Community Member
Below is sample XML from GoogleCSE API:
<?xml version="1.0" encoding="UTF-8"?>
<feed gd:kind="customsearch#search" xmlns="http://www.w3.org/2005/Atom" xmlns:cse="http://schemas.google.com/cseapi/2010" xmlns:gd="http://schemas.google.com/g/2005" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
<title>Google Custom Search - Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu</title>
<id>tag:www.googleapis.com,2010-09-29:/customsearch/v1?q= Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu&cx=008033228147187897025:-ua_scxr1uc&num=7&start=1&safe=off</id>
<author>
<name>Library Website Search Engine - Google Custom Search</name>
</author>
<updated>1970-01-16T11:10:30.455Z</updated>
<opensearch:Url type="application/atom+xml" template="https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={cse:safe?}&cx={cse:cx?}&cref={cse:cref?}&sort={cse:sort?}&filter={cse:filter?}&gl={cse:gl?}&cr={cse:cr?}}&googlehost={cse:googleHost?}&c2coff={?cse:disableCnTwTranslation}&hq={cse:hq?}&hl={cse:hl?}&siteSearch={cse:siteSearch?}&siteSearchFilter={cse:siteSearchFilter?}&exactTerms={cse:exactTerms?}&excludeTerms={cse:excludeTerms?}&linkSite={cse:linkSite?}&orTerms={cse:orTerms?}&relatedSite={cse:relatedSite?}&dateRestrict={cse:dateRestrict?}&lowRange={cse:lowRange?}&highRange={cse:highRange?}&searchType={cse:searchType?}&fileType={cse:fileType?}&rights={cse:rights?}&imgsz={cse:imgsz?}&imgtype={cse:imgtype?}&imgc={cse:imgc?}&imgcolor={cse:imgcolor?}&alt=atom"/>
<opensearch:Query role="request" title="Google Custom Search - Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu" totalResults="7" searchTerms=" Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu" count="7" startIndex="1" inputEncoding="utf8" outputEncoding="utf8" cse:safe="off" cse:cx="008033228147187897025:-ua_scxr1uc"/>
<opensearch:totalResults>7</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<cse:context title="Library Website Search Engine"/>
<cse:searchInformation>
<cse:searchTime>0.073074</cse:searchTime>
<cse:formattedSearchTime>0.07</cse:formattedSearchTime>
<cse:totalResults>7</cse:totalResults>
<cse:formattedTotalResults>7</cse:formattedTotalResults>
</cse:searchInformation>
<cse:spelling>
<cse:correctedQuery type="html"/>
</cse:spelling>
<entry gd:kind="customsearch#result">
<id>http://www.albertus.edu/policy-reports/advancement-publications/documents/albertus-archive-october-2011-special-edition.pdf</id>
<updated>1970-01-16T11:10:30.455Z</updated>
<title type="html">Special Edition Athletics @lbertus <b>Newsletter</b></title>
<link href="http://www.albertus.edu/policy-reports/advancement-publications/documents/albertus-archive-october-2011-special-edition.pdf" title="www.albertus.edu"/>
<summary type="html">This weekend marks a busy and historic time on campus for the <b>Albertus</b>. <br> <b>Magnus College</b> Athletics Department as both the men&#39;s and women&#39;s soccer <b>...</b></summary>
<cse:cacheId>AJGUZgC9CVMJ</cse:cacheId>
<cse:mime>application/pdf</cse:mime>
<cse:fileFormat>PDF/Adobe Acrobat</cse:fileFormat>
<cse:formattedUrl type="html">www.<b>albertus.edu</b>/.../<b>albertus</b>-archive-october-2011-special-edition.pdf</cse:formattedUrl>
<cse:PageMap>
<cse:DataObject type="metatags">
<cse:Attribute name="creationdate" value="D:20111118135759-05'00'"/>
<cse:Attribute name="producer" value="Acrobat Web Capture 8.0"/>
<cse:Attribute name="moddate" value="D:20111118140743-05'00'"/>
<cse:Attribute name="title" value="Special Edition Athletics @lbertus Newsletter"/>
</cse:DataObject>
</cse:PageMap>
</entry>
...
</feed>
I'm using Generate Extract operator. I've specified the namespaces as:
<list key="namespaces">
<parameter key="x" value="http://www.kbcafe.com/rss/atom.xsd.xml"/>
<parameter key="xmlns:cse" value="http://schemas.google.com/cseapi/2010"/>
<parameter key="xmlns:gd" value="http://schemas.google.com/g/2005"/>
<parameter key="xmlns:opensearch" value="http://a9.com/-/spec/opensearch/1.1/"/>
<parameter key="xx" value="xml"/>
</list>
I've tried to extract xpath such as
//x:feed
//feed
and more specific - can't seem to match anyhting in ths feed. I'm sure the problem is in my namespaces, but I don't know where to go to find the answer.
The targets I want to extract are
//x:feed/x:entry/x:title
and //x:feed/x:entry/x:link/@href.
Any help would be appreciated.
<?xml version="1.0" encoding="UTF-8"?>
<feed gd:kind="customsearch#search" xmlns="http://www.w3.org/2005/Atom" xmlns:cse="http://schemas.google.com/cseapi/2010" xmlns:gd="http://schemas.google.com/g/2005" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
<title>Google Custom Search - Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu</title>
<id>tag:www.googleapis.com,2010-09-29:/customsearch/v1?q= Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu&cx=008033228147187897025:-ua_scxr1uc&num=7&start=1&safe=off</id>
<author>
<name>Library Website Search Engine - Google Custom Search</name>
</author>
<updated>1970-01-16T11:10:30.455Z</updated>
<opensearch:Url type="application/atom+xml" template="https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={cse:safe?}&cx={cse:cx?}&cref={cse:cref?}&sort={cse:sort?}&filter={cse:filter?}&gl={cse:gl?}&cr={cse:cr?}}&googlehost={cse:googleHost?}&c2coff={?cse:disableCnTwTranslation}&hq={cse:hq?}&hl={cse:hl?}&siteSearch={cse:siteSearch?}&siteSearchFilter={cse:siteSearchFilter?}&exactTerms={cse:exactTerms?}&excludeTerms={cse:excludeTerms?}&linkSite={cse:linkSite?}&orTerms={cse:orTerms?}&relatedSite={cse:relatedSite?}&dateRestrict={cse:dateRestrict?}&lowRange={cse:lowRange?}&highRange={cse:highRange?}&searchType={cse:searchType?}&fileType={cse:fileType?}&rights={cse:rights?}&imgsz={cse:imgsz?}&imgtype={cse:imgtype?}&imgc={cse:imgc?}&imgcolor={cse:imgcolor?}&alt=atom"/>
<opensearch:Query role="request" title="Google Custom Search - Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu" totalResults="7" searchTerms=" Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu" count="7" startIndex="1" inputEncoding="utf8" outputEncoding="utf8" cse:safe="off" cse:cx="008033228147187897025:-ua_scxr1uc"/>
<opensearch:totalResults>7</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<cse:context title="Library Website Search Engine"/>
<cse:searchInformation>
<cse:searchTime>0.073074</cse:searchTime>
<cse:formattedSearchTime>0.07</cse:formattedSearchTime>
<cse:totalResults>7</cse:totalResults>
<cse:formattedTotalResults>7</cse:formattedTotalResults>
</cse:searchInformation>
<cse:spelling>
<cse:correctedQuery type="html"/>
</cse:spelling>
<entry gd:kind="customsearch#result">
<id>http://www.albertus.edu/policy-reports/advancement-publications/documents/albertus-archive-october-2011-special-edition.pdf</id>
<updated>1970-01-16T11:10:30.455Z</updated>
<title type="html">Special Edition Athletics @lbertus <b>Newsletter</b></title>
<link href="http://www.albertus.edu/policy-reports/advancement-publications/documents/albertus-archive-october-2011-special-edition.pdf" title="www.albertus.edu"/>
<summary type="html">This weekend marks a busy and historic time on campus for the <b>Albertus</b>. <br> <b>Magnus College</b> Athletics Department as both the men&#39;s and women&#39;s soccer <b>...</b></summary>
<cse:cacheId>AJGUZgC9CVMJ</cse:cacheId>
<cse:mime>application/pdf</cse:mime>
<cse:fileFormat>PDF/Adobe Acrobat</cse:fileFormat>
<cse:formattedUrl type="html">www.<b>albertus.edu</b>/.../<b>albertus</b>-archive-october-2011-special-edition.pdf</cse:formattedUrl>
<cse:PageMap>
<cse:DataObject type="metatags">
<cse:Attribute name="creationdate" value="D:20111118135759-05'00'"/>
<cse:Attribute name="producer" value="Acrobat Web Capture 8.0"/>
<cse:Attribute name="moddate" value="D:20111118140743-05'00'"/>
<cse:Attribute name="title" value="Special Edition Athletics @lbertus Newsletter"/>
</cse:DataObject>
</cse:PageMap>
</entry>
...
</feed>
I'm using Generate Extract operator. I've specified the namespaces as:
<list key="namespaces">
<parameter key="x" value="http://www.kbcafe.com/rss/atom.xsd.xml"/>
<parameter key="xmlns:cse" value="http://schemas.google.com/cseapi/2010"/>
<parameter key="xmlns:gd" value="http://schemas.google.com/g/2005"/>
<parameter key="xmlns:opensearch" value="http://a9.com/-/spec/opensearch/1.1/"/>
<parameter key="xx" value="xml"/>
</list>
I've tried to extract xpath such as
//x:feed
//feed
and more specific - can't seem to match anyhting in ths feed. I'm sure the problem is in my namespaces, but I don't know where to go to find the answer.
The targets I want to extract are
//x:feed/x:entry/x:title
and //x:feed/x:entry/x:link/@href.
Any help would be appreciated.
Tagged:
0
Answers
-
I've created a Google Custom Search Engine, and that allows capturing Google search results. But I'm stuck on the Google data and CSe schemas. I can grab the pages, but I can't seem to extract xpaths form them.0
-
Hi,
how are you trying to extract XPaths? Your current process setup and maybe some sample data would be useful to write a well-founded answer.
Best,
Marius0 -
Here's my process
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
<parameter key="logverbosity" value="all"/>
<parameter key="logfile" value="C:\Users\charper\Documents\ALiNBUS\gpagesextract.txt"/>
<process expanded="true" height="424" width="493">
<operator activated="true" class="retrieve" compatibility="5.2.003" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//NewLocalRepository/ALinBus/GooglePages"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.003" expanded="true" height="76" name="Select Attributes" width="90" x="98" y="107">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="attribute" value="EntryXML"/>
<parameter key="regular_expression" value="Entry.*|Link.*"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="replace" compatibility="5.2.003" expanded="true" height="76" name="Replace" width="90" x="112" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="GooglePage"/>
<parameter key="replace_what" value="xmlns.*\>"/>
<parameter key="replace_by" value="/>"/>
</operator>
<operator activated="true" class="text:generate_extract" compatibility="5.2.001" expanded="true" height="60" name="Generate Extract" width="90" x="112" y="255">
<parameter key="source_attribute" value="GooglePage"/>
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries">
<parameter key="EntryXML1" value="\\<entry.\\<\\/entry\\>"/>
</list>
<list key="xpath_queries">
<parameter key="LinkTitle1" value="//entry/text()"/>
</list>
<list key="namespaces">
<parameter key="atom" value="file:///c/Users/charper/Documents/ALiNBUS/schema-cse-feed-local.xsd"/>
</list>
<parameter key="ignore_CDATA" value="false"/>
<list key="index_queries"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.2.003" expanded="true" height="76" name="Write CSV" width="90" x="297" y="229"/>
<connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Replace" from_port="example set output" to_op="Generate Extract" to_port="Example Set"/>
<connect from_op="Generate Extract" from_port="Example Set" to_op="Write CSV" to_port="input"/>
<connect from_op="Write CSV" from_port="through" to_port="result 1"/>
<connect from_op="Write CSV" from_port="file" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
My latest attempt was to try to take the import statements out of both the GooglePage attribute ( see Replace operator), and out of the .xsd. So the xsd looks like this:
I wasn't able to follow the import xsd links from the google output in my browser, so that's why I decided to try to dispense with them.
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="file:///c/Users/charper/Documents/ALiNBUS/schema-cse-feed-local.xsd" />
<xs:element name="feed">
<xs:complexType>
<xs:sequence>
<xs:element ref="title"/>
<xs:element ref="id"/>
<xs:element ref="author"/>
<xs:element ref="updated"/>
<xs:element ref="opensearch:Url"/>
<xs:element ref="opensearch:Query"/>
<xs:element ref="opensearch:totalResults"/>
<xs:element ref="opensearch:startIndex"/>
<xs:element ref="cse:context"/>
<xs:element ref="cse:searchInformation"/>
<xs:element ref="cse:spelling"/>
<xs:element ref="entry"/>
</xs:sequence>
<xs:attribute ref="gd:kind" use="required"/>
</xs:complexType>
</xs:element>
<xs:element name="author">
<xs:complexType>
<xs:sequence>
<xs:element ref="name"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="name" type="xs:string"/>
<xs:element name="entry">
<xs:complexType>
<xs:sequence>
<xs:element ref="id"/>
<xs:element ref="updated"/>
<xs:element ref="title"/>
<xs:element ref="link"/>
<xs:element ref="summary"/>
<xs:element ref="cse:cacheId"/>
<xs:element ref="cse:mime"/>
<xs:element ref="cse:fileFormat"/>
<xs:element ref="cse:formattedUrl"/>
<xs:element ref="cse:PageMap"/>
</xs:sequence>
<xs:attribute ref="gd:kind" use="required"/>
</xs:complexType>
</xs:element>
<xs:element name="link">
<xs:complexType>
<xs:attribute name="href" use="required" type="xs:anyURI"/>
<xs:attribute name="title" use="required" type="xs:NCName"/>
</xs:complexType>
</xs:element>
<xs:element name="summary">
<xs:complexType mixed="true">
<xs:attribute name="type" use="required" type="xs:NCName"/>
</xs:complexType>
</xs:element>
<xs:element name="title">
<xs:complexType mixed="true">
<xs:attribute name="type" type="xs:NCName"/>
</xs:complexType>
</xs:element>
<xs:element name="id" type="xs:anyURI"/>
<xs:element name="updated" type="xs:dateTime"/>
</xs:schema>
0 -
Maybe what I should ask is - can someone point me to the proper use of the namespaces parameter in the Generate Extract operator???0
-
Hi,
please have a look at the attached process. The trick is to prepend //entry with "atom:" like this: //atom:entry and to define the atom prefix in the namespaces parameter exactly as it is written in the xml data.
Best,
Marius<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
<parameter key="logverbosity" value="all"/>
<process expanded="true" height="536" width="705">
<operator activated="false" class="retrieve" compatibility="5.2.003" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//NewLocalRepository/ALinBus/GooglePages"/>
</operator>
<operator activated="true" class="text:create_document" compatibility="5.2.001" expanded="true" height="60" name="Create Document" width="90" x="45" y="165">
<parameter key="text" value="<?xml version="1.0" encoding="UTF-8"?> <feed gd:kind="customsearch#search" xmlns="http://www.w3.org/2005/Atom" xmlns:cse="http://schemas.google.com/cseapi/2010" xmlns:gd="http://schemas.google.com/g/2005" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"> <title>Google Custom Search - Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu</title> <id>tag:www.googleapis.com,2010-09-29:/customsearch/v1?q= Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu&amp;cx=008033228147187897025:-ua_scxr1uc&amp;num=7&amp;start=1&amp;safe=off</id> <author> <name>Library Website Search Engine - Google Custom Search</name> </author> <updated>1970-01-16T11:10:30.455Z</updated> <opensearch:Url type="application/atom+xml" template="https://www.googleapis.com/customsearch/v1?q={searchTerms}&amp;num={count?}&amp;start={startIndex?}&amp;lr={language?}&amp;safe={cse:safe?}&amp;cx={cse:cx?}&amp;cref={cse:cref?}&amp;sort={cse:sort?}&amp;filter={cse:filter?}&amp;gl={cse:gl?}&amp;cr={cse:cr?}}&amp;googlehost={cse:googleHost?}&amp;c2coff={?cse:disableCnTwTranslation}&amp;hq={cse:hq?}&amp;hl={cse:hl?}&amp;siteSearch={cse:siteSearch?}&amp;siteSearchFilter={cse:siteSearchFilter?}&amp;exactTerms={cse:exactTerms?}&amp;excludeTerms={cse:excludeTerms?}&amp;linkSite={cse:linkSite?}&amp;orTerms={cse:orTerms?}&amp;relatedSite={cse:relatedSite?}&amp;dateRestrict={cse:dateRestrict?}&amp;lowRange={cse:lowRange?}&amp;highRange={cse:highRange?}&amp;searchType={cse:searchType?}&amp;fileType={cse:fileType?}&amp;rights={cse:rights?}&amp;imgsz={cse:imgsz?}&amp;imgtype={cse:imgtype?}&amp;imgc={cse:imgc?}&amp;imgcolor={cse:imgcolor?}&amp;alt=atom"/> <opensearch:Query role="request" title="Google Custom Search - Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu" totalResults="7" searchTerms=" Albertus Magnus College. library Albertus Magnus College Library intitle:newsletter albertus.edu" count="7" startIndex="1" inputEncoding="utf8" outputEncoding="utf8" cse:safe="off" cse:cx="008033228147187897025:-ua_scxr1uc"/> <opensearch:totalResults>7</opensearch:totalResults> <opensearch:startIndex>1</opensearch:startIndex> <cse:context title="Library Website Search Engine"/> <cse:searchInformation> <cse:searchTime>0.073074</cse:searchTime> <cse:formattedSearchTime>0.07</cse:formattedSearchTime> <cse:totalResults>7</cse:totalResults> <cse:formattedTotalResults>7</cse:formattedTotalResults> </cse:searchInformation> <cse:spelling> <cse:correctedQuery type="html"/> </cse:spelling> <entry gd:kind="customsearch#result"> <id>http://www.albertus.edu/policy-reports/advancement-publications/documents/albertus-archive-october-2011-special-edition.pdf</id> <updated>1970-01-16T11:10:30.455Z</updated> <title type="html">Special Edition Athletics @lbertus &lt;b&gt;Newsletter&lt;/b&gt;</title> <link href="http://www.albertus.edu/policy-reports/advancement-publications/documents/albertus-archive-october-2011-special-edition.pdf" title="www.albertus.edu"/> <summary type="html">This weekend marks a busy and historic time on campus for the &lt;b&gt;Albertus&lt;/b&gt;. &lt;br&gt; &lt;b&gt;Magnus College&lt;/b&gt; Athletics Department as both the men&amp;#39;s and women&amp;#39;s soccer &lt;b&gt;...&lt;/b&gt;</summary> <cse:cacheId>AJGUZgC9CVMJ</cse:cacheId> <cse:mime>application/pdf</cse:mime> <cse:fileFormat>PDF/Adobe Acrobat</cse:fileFormat> <cse:formattedUrl type="html">www.&lt;b&gt;albertus.edu&lt;/b&gt;/.../&lt;b&gt;albertus&lt;/b&gt;-archive-october-2011-special-edition.pdf</cse:formattedUrl> <cse:PageMap> <cse:DataObject type="metatags"> <cse:Attribute name="creationdate" value="D:20111118135759-05&apos;00&apos;"/> <cse:Attribute name="producer" value="Acrobat Web Capture 8.0"/> <cse:Attribute name="moddate" value="D:20111118140743-05&apos;00&apos;"/> <cse:Attribute name="title" value="Special Edition Athletics @lbertus Newsletter"/> </cse:DataObject> </cse:PageMap> </entry> </feed>"/>
</operator>
<operator activated="true" class="text:documents_to_data" compatibility="5.2.001" expanded="true" height="76" name="Documents to Data" width="90" x="179" y="165">
<parameter key="text_attribute" value="GooglePage"/>
</operator>
<operator activated="true" class="text_to_nominal" compatibility="5.2.003" expanded="true" height="76" name="Text to Nominal" width="90" x="303" y="164"/>
<operator activated="false" class="select_attributes" compatibility="5.2.003" expanded="true" height="76" name="Select Attributes" width="90" x="180" y="30">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="attribute" value="EntryXML"/>
<parameter key="regular_expression" value="Entry.*|Link.*"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="false" class="replace" compatibility="5.2.003" expanded="true" height="76" name="Replace" width="90" x="313" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="GooglePage"/>
<parameter key="replace_what" value="xmlns.*\>"/>
<parameter key="replace_by" value="/>"/>
</operator>
<operator activated="true" class="text:generate_extract" compatibility="5.2.001" expanded="true" height="60" name="Generate Extract" width="90" x="447" y="30">
<parameter key="source_attribute" value="GooglePage"/>
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries">
<parameter key="x" value="<entry>(.*)</entry>"/>
</list>
<list key="regular_region_queries">
<parameter key="EntryXML1" value="\\<entry.\\<\\/entry\\>"/>
</list>
<list key="xpath_queries">
<parameter key="LinkTitle1" value="//atom:entry"/>
</list>
<list key="namespaces">
<parameter key="atom" value="http://www.w3.org/2005/Atom"/>
</list>
<parameter key="ignore_CDATA" value="false"/>
<parameter key="assume_html" value="false"/>
<list key="index_queries"/>
</operator>
<operator activated="false" class="write_csv" compatibility="5.2.003" expanded="true" height="76" name="Write CSV" width="90" x="585" y="30"/>
<connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Create Document" from_port="output" to_op="Documents to Data" to_port="documents 1"/>
<connect from_op="Documents to Data" from_port="example set" to_op="Text to Nominal" to_port="example set input"/>
<connect from_op="Text to Nominal" from_port="example set output" to_op="Generate Extract" to_port="Example Set"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Generate Extract" from_port="Example Set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0