Text Mining Processing from Data in two steps
Hi,
I’ve x webpage and like to analyze them. My analyzing goal is word occurrences in each sentence as window size. I use a 2-step approach where I collect all pages in a collection and split them in the first step in sentences. After a transpose of the ExampleSet and an assignment of the sentences column with the special attribute “text” with an R-script, I’d like to create a word vector for each sentence per web page. Here is my problem; my second Process from Data operator doesn’t create a word vector.
Has maybe someone an idea how I can solve my problem?
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Crawler (2)" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Crawler Spon (2)" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="web:crawl_web_modern" compatibility="7.3.000" expanded="true" height="68" name="Crawl Web (2)" width="90" x="112" y="34">
<parameter key="url" value="http://www.spiegel.de"/>
<list key="crawling_rules">
<parameter key="store_with_matching_url" value=".+www.spiegel.+"/>
<parameter key="follow_link_with_matching_url" value=".+spiegel.+|.+de.+"/>
</list>
<parameter key="max_crawl_depth" value="10"/>
<parameter key="retrieve_as_html" value="true"/>
<parameter key="add_content_as_attribute" value="true"/>
<parameter key="max_pages" value="10"/>
<parameter key="max_page_size" value="100000"/>
<parameter key="delay" value="100"/>
<parameter key="max_concurrent_connections" value="200"/>
<parameter key="max_connections_per_host" value="100"/>
<parameter key="user_agent" value="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0"/>
</operator>
<operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages (2)" width="90" x="246" y="34">
<parameter key="link_attribute" value="Link"/>
<parameter key="page_attribute" value="link"/>
<parameter key="random_user_agent" value="true"/>
</operator>
<connect from_op="Crawl Web (2)" from_port="example set" to_op="Get Pages (2)" to_port="Example Set"/>
<connect from_op="Get Pages (2)" from_port="Example Set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (4)" width="90" x="246" y="34">
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content (2)" width="90" x="179" y="34">
<parameter key="minimum_text_block_length" value="10"/>
<parameter key="ignore_non_html_tags" value="false"/>
</operator>
<connect from_port="document" to_op="Extract Content (2)" to_port="document"/>
<connect from_op="Extract Content (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_op="Crawler Spon (2)" from_port="out 1" to_op="Process Documents from Data (4)" to_port="example set"/>
<connect from_op="Process Documents from Data (4)" from_port="example set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="false" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve 120818 4" width="90" x="45" y="136">
<parameter key="repository_entry" value="../Data/120818 4"/>
</operator>
<operator activated="true" class="free_memory" compatibility="9.0.000" expanded="true" height="82" name="Free Memory" width="90" x="179" y="34"/>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (3)" width="90" x="313" y="34">
<parameter key="attribute_name" value="text"/>
<list key="set_additional_roles">
<parameter key="Title" value="label"/>
</list>
</operator>
<operator activated="true" class="generate_id" compatibility="9.0.000" expanded="true" height="82" name="Generate ID (2)" width="90" x="447" y="34"/>
<operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="text|Title"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="715" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="Title.is_not_missing."/>
</list>
<parameter key="filters_logic_and" value="false"/>
<parameter key="filters_check_metadata" value="false"/>
</operator>
<operator activated="true" class="operator_toolbox:group_into_collection" compatibility="1.3.000" expanded="true" height="82" name="Group Into Collection (2)" width="90" x="849" y="34">
<parameter key="group_by_attribute" value="id"/>
</operator>
<operator activated="true" class="loop_collection" compatibility="9.0.000" expanded="true" height="82" name="Loop Collection (2)" width="90" x="983" y="34">
<process expanded="true">
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (6)" width="90" x="112" y="34">
<parameter key="vector_creation" value="Term Frequency"/>
<parameter key="prune_below_percent" value="1.0"/>
<parameter key="prune_above_percent" value="100.0"/>
<parameter key="prune_below_absolute" value="20"/>
<parameter key="prune_above_absolute" value="2000"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Linguistic (5)" width="90" x="179" y="34">
<parameter key="mode" value="linguistic sentences"/>
<parameter key="language" value="German"/>
</operator>
<connect from_port="document" to_op="Tokenize Linguistic (5)" to_port="document"/>
<connect from_op="Tokenize Linguistic (5)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="transpose" compatibility="9.0.000" expanded="true" height="82" name="Transpose" width="90" x="246" y="34"/>
<operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename (2)" width="90" x="380" y="34">
<parameter key="old_name" value="id"/>
<parameter key="new_name" value="saetze"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="r_scripting:execute_r" compatibility="8.1.000" expanded="true" height="82" name="Execute R" width="90" x="514" y="34">
<parameter key="script" value="library("rpart") rm_main = function(data) { #labelEntry <- grep("label",metaData$data) #labelName <- names(metaData$data[labelEntry]) idEntry <- grep("id",metaData$data) idName <- names(metaData$data[idEntry]) saetze <- idName metaData$data$saetze <<- list(type="text", role="text") return(list(data=data)) }"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="648" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="saetze"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (5)" width="90" x="782" y="34">
<parameter key="prune_below_percent" value="1.0"/>
<parameter key="prune_above_percent" value="100.0"/>
<parameter key="prune_below_absolute" value="20"/>
<parameter key="prune_above_absolute" value="2000"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Non-letters (3)" width="90" x="45" y="34"/>
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize Linguistic (2)" width="90" x="179" y="34">
<parameter key="mode" value="linguistic sentences"/>
<parameter key="language" value="German"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (63)" width="90" x="313" y="34">
<parameter key="min_chars" value="2"/>
</operator>
<operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Adblocker (3)" width="90" x="447" y="34">
<process expanded="true">
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (64)" width="90" x="45" y="34">
<parameter key="string" value="nicht"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (65)" width="90" x="179" y="34">
<parameter key="string" value="SPIEGEL"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (66)" width="90" x="313" y="34">
<parameter key="string" value="ONLINE"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (67)" width="90" x="447" y="34">
<parameter key="string" value="Vermutlich"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (68)" width="90" x="581" y="34">
<parameter key="string" value="haben"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (69)" width="90" x="715" y="34">
<parameter key="string" value="Adblocker"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (70)" width="90" x="849" y="34">
<parameter key="string" value="aktiviert"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (71)" width="90" x="983" y="34">
<parameter key="string" value="Ausnahme"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (72)" width="90" x="1117" y="34">
<parameter key="string" value="hinzugefügt"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (73)" width="90" x="45" y="136">
<parameter key="string" value="prüfen"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (74)" width="90" x="179" y="136">
<parameter key="string" value="Erweiterungen"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (75)" width="90" x="313" y="136">
<parameter key="string" value="Do"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (76)" width="90" x="447" y="136">
<parameter key="string" value="not"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (77)" width="90" x="581" y="136">
<parameter key="string" value="Track"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (78)" width="90" x="715" y="136">
<parameter key="string" value="Funktionen"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (79)" width="90" x="849" y="136">
<parameter key="string" value="Inkognito"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (80)" width="90" x="983" y="136">
<parameter key="string" value="Modus"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (81)" width="90" x="1117" y="136">
<parameter key="string" value="finden"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (82)" width="90" x="45" y="238">
<parameter key="string" value="Werbung"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (83)" width="90" x="179" y="238">
<parameter key="string" value="unterdrücken"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (84)" width="90" x="313" y="238">
<parameter key="string" value="Informationen"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (85)" width="90" x="447" y="238">
<parameter key="string" value="Bedeutung"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (86)" width="90" x="581" y="238">
<parameter key="string" value="Browser"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (87)" width="90" x="715" y="238">
<parameter key="string" value="Sicherheit"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (88)" width="90" x="849" y="238">
<parameter key="string" value="Redaktion"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (89)" width="90" x="983" y="238">
<parameter key="string" value="arbeitet"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (90)" width="90" x="1117" y="238">
<parameter key="string" value="Fragen"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (91)" width="90" x="849" y="340">
<parameter key="string" value="Antworten"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (92)" width="90" x="983" y="340">
<parameter key="string" value="Netz"/>
<parameter key="invert condition" value="true"/>
</operator>
<operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (93)" width="90" x="1117" y="340">
<parameter key="string" value="de"/>
<parameter key="invert condition" value="true"/>
</operator>
<connect from_port="in 1" to_op="Filter Tokens (64)" to_port="document"/>
<connect from_op="Filter Tokens (64)" from_port="document" to_op="Filter Tokens (65)" to_port="document"/>
<connect from_op="Filter Tokens (65)" from_port="document" to_op="Filter Tokens (66)" to_port="document"/>
<connect from_op="Filter Tokens (66)" from_port="document" to_op="Filter Tokens (67)" to_port="document"/>
<connect from_op="Filter Tokens (67)" from_port="document" to_op="Filter Tokens (68)" to_port="document"/>
<connect from_op="Filter Tokens (68)" from_port="document" to_op="Filter Tokens (69)" to_port="document"/>
<connect from_op="Filter Tokens (69)" from_port="document" to_op="Filter Tokens (70)" to_port="document"/>
<connect from_op="Filter Tokens (70)" from_port="document" to_op="Filter Tokens (71)" to_port="document"/>
<connect from_op="Filter Tokens (71)" from_port="document" to_op="Filter Tokens (72)" to_port="document"/>
<connect from_op="Filter Tokens (72)" from_port="document" to_op="Filter Tokens (73)" to_port="document"/>
<connect from_op="Filter Tokens (73)" from_port="document" to_op="Filter Tokens (74)" to_port="document"/>
<connect from_op="Filter Tokens (74)" from_port="document" to_op="Filter Tokens (75)" to_port="document"/>
<connect from_op="Filter Tokens (75)" from_port="document" to_op="Filter Tokens (76)" to_port="document"/>
<connect from_op="Filter Tokens (76)" from_port="document" to_op="Filter Tokens (77)" to_port="document"/>
<connect from_op="Filter Tokens (77)" from_port="document" to_op="Filter Tokens (78)" to_port="document"/>
<connect from_op="Filter Tokens (78)" from_port="document" to_op="Filter Tokens (79)" to_port="document"/>
<connect from_op="Filter Tokens (79)" from_port="document" to_op="Filter Tokens (80)" to_port="document"/>
<connect from_op="Filter Tokens (80)" from_port="document" to_op="Filter Tokens (81)" to_port="document"/>
<connect from_op="Filter Tokens (81)" from_port="document" to_op="Filter Tokens (82)" to_port="document"/>
<connect from_op="Filter Tokens (82)" from_port="document" to_op="Filter Tokens (83)" to_port="document"/>
<connect from_op="Filter Tokens (83)" from_port="document" to_op="Filter Tokens (84)" to_port="document"/>
<connect from_op="Filter Tokens (84)" from_port="document" to_op="Filter Tokens (85)" to_port="document"/>
<connect from_op="Filter Tokens (85)" from_port="document" to_op="Filter Tokens (86)" to_port="document"/>
<connect from_op="Filter Tokens (86)" from_port="document" to_op="Filter Tokens (87)" to_port="document"/>
<connect from_op="Filter Tokens (87)" from_port="document" to_op="Filter Tokens (88)" to_port="document"/>
<connect from_op="Filter Tokens (88)" from_port="document" to_op="Filter Tokens (89)" to_port="document"/>
<connect from_op="Filter Tokens (89)" from_port="document" to_op="Filter Tokens (90)" to_port="document"/>
<connect from_op="Filter Tokens (90)" from_port="document" to_op="Filter Tokens (91)" to_port="document"/>
<connect from_op="Filter Tokens (91)" from_port="document" to_op="Filter Tokens (92)" to_port="document"/>
<connect from_op="Filter Tokens (92)" from_port="document" to_op="Filter Tokens (93)" to_port="document"/>
<connect from_op="Filter Tokens (93)" from_port="document" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:filter_stopwords_german" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (3)" width="90" x="581" y="34"/>
<connect from_port="document" to_op="Tokenize Non-letters (3)" to_port="document"/>
<connect from_op="Tokenize Non-letters (3)" from_port="document" to_op="Tokenize Linguistic (2)" to_port="document"/>
<connect from_op="Tokenize Linguistic (2)" from_port="document" to_op="Filter Tokens (63)" to_port="document"/>
<connect from_op="Filter Tokens (63)" from_port="document" to_op="Adblocker (3)" to_port="in 1"/>
<connect from_op="Adblocker (3)" from_port="out 1" to_op="Filter Stopwords (3)" to_port="document"/>
<connect from_op="Filter Stopwords (3)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="free_memory" compatibility="9.0.000" expanded="true" height="82" name="Free Memory (2)" width="90" x="916" y="34"/>
<connect from_port="single" to_op="Process Documents from Data (6)" to_port="example set"/>
<connect from_op="Process Documents from Data (6)" from_port="example set" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
<connect from_op="Rename (2)" from_port="example set output" to_op="Execute R" to_port="input 1"/>
<connect from_op="Execute R" from_port="output 1" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Process Documents from Data (5)" to_port="example set"/>
<connect from_op="Process Documents from Data (5)" from_port="example set" to_op="Free Memory (2)" to_port="through 1"/>
<connect from_op="Free Memory (2)" from_port="through 1" to_port="output 1"/>
<portSpacing port="source_single" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Crawler (2)" from_port="out 1" to_op="Free Memory" to_port="through 1"/>
<connect from_op="Free Memory" from_port="through 1" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Generate ID (2)" to_port="example set input"/>
<connect from_op="Generate ID (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Group Into Collection (2)" to_port="exa"/>
<connect from_op="Group Into Collection (2)" from_port="col" to_op="Loop Collection (2)" to_port="collection"/>
<connect from_op="Loop Collection (2)" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Best regards Tobias
Answers
-
hi @TobiasNehrig - sorry this took so long. So Process Documents will not normally look at special attributes - you have to either ensure that the attribute is regular or "force it" via the select attributes and weights parameter:
force it to take your attributedelete the role from "text" in your R script
Then it works fine:
Scott
1