🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Problem extracting data

User: "dajaz27"
New Altair Community Member
Updated by Jocelyn
Hello I am new to rapidminer. I started out with a simple craiglist scrape. However, I do not get any data back. Can some one please advise?


no"?>
<process version="5.2.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
    <process expanded="true" height="-20" width="-50">
      <operator activated="true" class="web:process_web" compatibility="5.2.003" expanded="true" height="60" name="Process Documents from Web" width="90" x="36" y="46">
        <parameter key="url" value="http://tampa.craigslist.org/cto"/>
        <list key="crawling_rules"/>
        <parameter key="add_pages_as_attribute" value="true"/>
        <parameter key="domain" value="subtree"/>
        <parameter key="max_page_size" value="10000"/>
        <parameter key="user_agent" value="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"/>
        <parameter key="obey_robot_exclusion" value="false"/>
        <parameter key="really_ignore_exclusion" value="true"/>
        <process expanded="true" height="171" width="738">
          <operator activated="true" class="text:extract_information" compatibility="5.2.004" expanded="true" height="60" name="Extract Information" width="90" x="114" y="24">
            <parameter key="query_type" value="XPath"/>
            <list key="string_machting_queries"/>
            <parameter key="attribute_type" value="Binominal"/>
            <list key="regular_expression_queries"/>
            <list key="regular_region_queries"/>
            <list key="xpath_queries">
              <parameter key="link" value="//*[@id=&amp;quot;toc_rows&quot;]/p"/>
              <parameter key="price" value="//*[@id=&amp;quot;toc_rows&quot;]/p[2]/span"/>
              <parameter key="location" value="//*[@id=&amp;quot;toc_rows&quot;]/p[2]/span[6]/font"/>
              <parameter key="title" value="/html/body/article/section/h2"/>
              <parameter key="ad body" value="//*[@id=&amp;quot;userbody&quot;]"/>
              <parameter key="postingid" value="/html/body/article/section/p"/>
              <parameter key="email" value="/html/body/article/section/section[1]/small/a"/>
            </list>
            <list key="namespaces">
              <parameter key="postingtitle" value="*[local-name(.) = 'postingtitle']"/>
              <parameter key="body" value="*[local-name(.) = 'userbody']"/>
              <parameter key="email" value="*[local-name(.) = 'small']"/>
            </list>
            <list key="index_queries"/>
          </operator>
          <connect from_port="document" to_op="Extract Information" to_port="document"/>
          <connect from_op="Extract Information" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Process Documents from Web" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>









Find more posts tagged with