🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Reading multiples text files from a folder and converting them to an exampleset ?

User: "curious95"
New Altair Community Member
Updated by Jocelyn

I have a folder that contains multiple text files. So how can we read the content of each file and use them as a row in the exampleset.

 

for ex : 

for a folder with 5 text files the exampleset will look like.

| Row |    TextContent    |

-------  | ---------------------

1        | contents of fille1

2        | contents of fille2

3        | contents of fille3

4        | contents of fille4

5        | contents of fille5

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "greg_lorincz79"
    New Altair Community Member
    Accepted Answer

    I'm having trouble with the above process since it only picks up the first line of every documents. The files are emails in .txt format. How can I extract the whole body of the texts?

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.000">
    <operator activated="true" class="concurrency:loop_files" compatibility="8.2.000" expanded="true" height="82" name="Loop Files" width="90" x="199" y="119">
    <parameter key="directory" value="/Users/alkopop79/Datasets/enron_sample"/>
    <parameter key="filter_type" value="glob"/>
    <parameter key="recursive" value="false"/>
    <parameter key="enable_macros" value="false"/>
    <parameter key="macro_for_file_name" value="file_name"/>
    <parameter key="macro_for_file_type" value="file_type"/>
    <parameter key="macro_for_folder_name" value="folder_name"/>
    <parameter key="reuse_results" value="false"/>
    <parameter key="enable_parallel_execution" value="true"/>
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.2.000" expanded="true" height="68" name="Read CSV" width="90" x="243" y="262">
    <parameter key="csv_file" value="/Users/alkopop79/Datasets/enron_sample/.DS_Store"/>
    <parameter key="column_separators" value=";"/>
    <parameter key="trim_lines" value="false"/>
    <parameter key="use_quotes" value="true"/>
    <parameter key="quotes_character" value="&quot;"/>
    <parameter key="escape_character" value="\"/>
    <parameter key="skip_comments" value="false"/>
    <parameter key="comment_characters" value="#"/>
    <parameter key="parse_numbers" value="true"/>
    <parameter key="decimal_character" value="."/>
    <parameter key="grouped_digits" value="false"/>
    <parameter key="grouping_character" value=","/>
    <parameter key="date_format" value=""/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="encoding" value="UTF-8"/>
    <parameter key="read_all_values_as_polynominal" value="false"/>
    <list key="data_set_meta_data_information"/>
    <parameter key="read_not_matching_values_as_missings" value="true"/>
    <parameter key="datamanagement" value="double_array"/>
    <parameter key="data_management" value="auto"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.2.000" expanded="true" height="68" name="Store" width="90" x="476" y="268">
    <parameter key="repository_entry" value="enron_sample"/>
    </operator>
    <connect from_port="file object" to_op="Read CSV" to_port="file"/>
    <connect from_op="Read CSV" from_port="output" to_op="Store" to_port="input"/>
    <connect from_op="Store" from_port="through" to_port="output 1"/>
    <portSpacing port="source_file object" spacing="0"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    </process>