Dear all,
I'm trying to set up a process to:
-loop through pdf files in a folder (and subfolders)
-extract text from the pdf files
-store the text in a DB
I created the following process
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="loop_files" compatibility="5.3.015" expanded="true" height="60" name="Loop Files" width="90" x="112" y="30">
<parameter key="directory" value="C:\Literature"/>
<parameter key="recursive" value="true"/>
<parameter key="iterate_over_subdirs" value="true"/>
<process expanded="true">
<operator activated="true" class="text:read_document" compatibility="5.3.002" expanded="true" height="60" name="Read Document" width="90" x="112" y="30">
<parameter key="content_type" value="pdf"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="5.3.002" expanded="true" height="94" name="Process Documents" width="90" x="313" y="30">
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<process expanded="true">
<operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="179" y="75"/>
<connect from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
<connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_database" compatibility="5.3.015" expanded="true" height="60" name="Write Database" width="90" x="447" y="30">
<parameter key="connection" value="SQL"/>
<parameter key="table_name" value="Docs"/>
<parameter key="overwrite_mode" value="append"/>
</operator>
<connect from_port="file object" to_op="Read Document" to_port="file"/>
<connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_op="Write Database" to_port="input"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
</process>
</operator>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
But when executing I have the following errors:
If the "Iterate over subdir" flag is enabled it says that it can not access the file (access denied)

If that parameter is disabled, the "Read document" operator throws the following EOF error

Any idea ???