nav[aria-label="Primary Navigation"] { padding: 0; & ul { list-style: none; width: 100%; display: flex; flex-direction: row; justify-content: start; align-items: start; gap: 30px; padding: 0; & li { margin: 0; } & ul li { list-style: none; } } }
Siemens Community Catalyst Program

The Siemens Community Catalyst program was co-created with our community to acknowledge technology leaders who consistently contribute to the Siemens Community. Nominations are accepted on a rolling basis.
Nominate Now
⚠️Please Note

Technical discussions have been migrated to the Siemens Support Center as Knowledge Base (KB) articles; please note that this content is no longer maintained and may be outdated, so for the latest information, log in to the Siemens Support Center, search online, or contact our support team.
Search for Content in Siemens Support Center
create multiple neighborhood co-occurrence graphs

TobiasNehrig
Hi,
i would like to create multiple neighborhood co-occurrence graphs (max. 2000).
My Example Set from the execute R operators look like:
Row No. | Document | Word1 | Word2 | n
1 aaa bbb 2
1 bbb ddd 3
1 aaa bbb 4
2 aaa ccc 3
2 aaa bbb 4
2 ccc aaa 3
Now for each Document I‘d like to create a graph where the vertex are represented by the words and the edges by n.
How can I separate or divide the Example Set for each Document, so that I can create the graphs and save them?
My Process looks like:
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Crawler" width="90" x="45" y="289">
        <process expanded="true">
          <operator activated="true" class="web:crawl_web_modern" compatibility="7.3.000" expanded="true" height="68" name="Crawl Web" width="90" x="112" y="34">
            <parameter key="url" value="http://www.spiegel.de"/>
            <list key="crawling_rules">
              <parameter key="store_with_matching_url" value=".+www.spiegel.+"/>
              <parameter key="follow_link_with_matching_url" value=".+spiegel.+|.+de.+"/>
            </list>
            <parameter key="max_crawl_depth" value="10"/>
            <parameter key="retrieve_as_html" value="true"/>
            <parameter key="add_content_as_attribute" value="true"/>
            <parameter key="max_pages" value="15"/>
            <parameter key="delay" value="100"/>
            <parameter key="max_concurrent_connections" value="50"/>
            <parameter key="user_agent" value="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0"/>
          </operator>
          <operator activated="true" class="web:retrieve_webpages" compatibility="7.3.000" expanded="true" height="68" name="Get Pages" width="90" x="246" y="34">
            <parameter key="link_attribute" value="Link"/>
            <parameter key="page_attribute" value="link"/>
            <parameter key="random_user_agent" value="true"/>
          </operator>
          <connect from_op="Crawl Web" from_port="example set" to_op="Get Pages" to_port="Example Set"/>
          <connect from_op="Get Pages" from_port="Example Set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="7.5.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="45" y="136">
        <parameter key="create_word_vector" value="false"/>
        <parameter key="add_meta_information" value="false"/>
        <parameter key="keep_text" value="true"/>
        <parameter key="prune_method" value="by ranking"/>
        <parameter key="prune_below_absolute" value="10"/>
        <parameter key="prune_above_absolute" value="3000"/>
        <parameter key="data_management" value="memory-optimized"/>
        <parameter key="select_attributes_and_weights" value="true"/>
        <list key="specify_weights">
          <parameter key="link" value="1.0"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="web:extract_html_text_content" compatibility="7.3.000" expanded="true" height="68" name="Extract Content" width="90" x="45" y="34">
            <parameter key="minimum_text_block_length" value="2"/>
          </operator>
          <operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize Token" width="90" x="179" y="34">
            <parameter key="mode" value="linguistic tokens"/>
            <parameter key="language" value="German"/>
          </operator>
          <operator activated="true" class="text:filter_tokens_by_content" compatibility="7.5.000" expanded="true" height="68" name="Filter Tokens a-zA-Z" width="90" x="313" y="34">
            <parameter key="condition" value="matches"/>
            <parameter key="regular_expression" value="[a-zA-Z]+"/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="7.5.000" expanded="true" height="68" name="Transform Cases" width="90" x="447" y="34"/>
          <connect from_port="document" to_op="Extract Content" to_port="document"/>
          <connect from_op="Extract Content" from_port="document" to_op="Tokenize Token" to_port="document"/>
          <connect from_op="Tokenize Token" from_port="document" to_op="Filter Tokens a-zA-Z" to_port="document"/>
          <connect from_op="Filter Tokens a-zA-Z" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="103" name="Splitting" width="90" x="45" y="34">
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="text"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="8.0.001" expanded="true" height="82" name="Generate ID" width="90" x="45" y="136"/>
          <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename ID" width="90" x="45" y="238">
            <parameter key="old_name" value="id"/>
            <parameter key="new_name" value="Document"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="45" y="340">
            <parameter key="attribute_name" value="text"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
            <parameter key="old_name" value="text"/>
            <parameter key="new_name" value="word"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="split" compatibility="8.0.001" expanded="true" height="82" name="Split" width="90" x="179" y="136">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="word"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="split_pattern" value="\s+"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="8.0.001" expanded="true" height="82" name="Splitting Output" width="90" x="313" y="34"/>
          <connect from_port="in 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_op="Rename ID" to_port="example set input"/>
          <connect from_op="Rename ID" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_op="Splitting Output" to_port="example set input"/>
          <connect from_op="Split" from_port="original" to_port="out 2"/>
          <connect from_op="Splitting Output" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="124" name="Neighborhood co-ocurrence" width="90" x="246" y="85">
        <process expanded="true">
          <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Generate Bigrams" width="90" x="45" y="34">
            <parameter key="script" value="rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#10;&#9;spon_bigrams &lt;- data %&gt;%&#10;&#9;  unnest_tokens(bigram, word, token = &quot;ngrams&quot;, n = 2)&#10;&#9;print(spon_bigrams)&#10;&#10;    return(list(spon_bigrams))    &#10;}&#10;"/>
          </operator>
          <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Seperate Bigrams" width="90" x="45" y="136">
            <parameter key="script" value="rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;&#9;library(tokenizers)&#10;&#10;&#9;devided_bigrams &lt;-data %&gt;%&#10;&#9; separate(bigram, c(&quot;word1&quot;, &quot;word2&quot;), sep = &quot; &quot;)&#10;&#9; print(devided_bigrams)&#10;&#10;&#9;bigrams_filtered &lt;- devided_bigrams %&gt;%&#10;&#9; filter(!word1 %in% stopwords(&quot;de&quot;)) %&gt;%&#10;&#9; filter(!word2 %in% stopwords(&quot;de&quot;))&#10;&#10;    return(list(bigrams_filtered))&#10;    &#10;}&#10;"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Seperated Bigrams" width="90" x="45" y="238"/>
          <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Count Bigrams per Page" width="90" x="179" y="34">
            <parameter key="script" value="rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;&#10;&#9;count_bigrams_per_page &lt;- data %&gt;%&#10;&#9;  count(Document, word1, word2, sort = TRUE)&#10;&#9;print(count_bigrams_per_page)&#10;&#10;&#9;counted_bigrams_per_page &lt;- data.frame(count_bigrams_per_page)&#10;&#10;    return(counted_bigrams_per_page)&#10;}&#10;"/>
          </operator>
          <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Count all Bigrams" width="90" x="179" y="136">
            <parameter key="script" value="rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;&#10;&#9;count_bigrams &lt;- data %&gt;%&#10;&#9;  count(word1, word2, sort = TRUE)&#10;&#9;print(count_bigrams)&#10;&#10;&#9;counted_bigrams &lt;- data.frame(count_bigrams)&#10;   &#10;    return(counted_bigrams)&#10;}&#10;"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Counted Bigrams per Page" width="90" x="313" y="34"/>
          <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Visual Bigrams" width="90" x="447" y="34">
            <parameter key="script" value="# not finished&#10;&#10;rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;     library(igraph)&#10;&#10;     bigram_graph &lt;- data %&gt;%&#10;       filter(n &gt;= 5) %&gt;%&#10;       graph_from_data_frame&#10;      print(bigram_graph)&#10;    &#9;# bigram_graph &lt;- data.frame(bigram_graph)&#10;&#10;    &#9;library(ggraph)&#10;    &#9;set.seed(2017)&#10;&#10;    &#9;graph &lt;- ggraph(bigram_graph, layout = &quot;fr&quot;) +&#10;    &#9;  geom_edge_link() +&#10;    &#9;  geom_node_point() +&#10;    &#9;  geom_node_text(aes(label = name), vjust = 1, hjust =1)&#10;&#9;print(graph)&#10;    &#9;setwd(&quot;/home/knecht&quot;)&#10;    &#9;png(filename=&quot;imagetest002.png&quot;)&#10;    &#9;plot(graph)&#10;    &#9;dev.off()&#10;    &#9;    &#9;&#10;     return(list(graph))&#10;}&#10;"/>
          </operator>
          <operator activated="false" class="subprocess" compatibility="8.0.001" expanded="true" height="145" name="Analyse specific Words" width="90" x="313" y="187">
            <process expanded="true">
              <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="85"/>
              <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Filter Word 1" width="90" x="179" y="34">
                <parameter key="script" value="rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;&#10;&#9;bigrams_filtered_word1 &lt;- data %&gt;%&#10;&#9;  #count(word1, word2, sort = TRUE)&#10;&#9;  filter(word1 == &quot;umwelt&quot;) %&gt;%&#10;&#9;  count(word1, word2, sort =TRUE)&#10;&#9;  print(bigrams_filtered_word1)&#10;   &#9;bigrams_filtered_word1 &lt;- data.frame(bigrams_filtered_word1)&#10;    return(bigrams_filtered_word1)&#10;}&#10;"/>
              </operator>
              <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Word1 Filter" width="90" x="313" y="34"/>
              <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Visual Word 1" width="90" x="447" y="34">
                <parameter key="script" value="# not finished&#10;&#10;rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;     library(igraph)&#10;&#10;     bigram_graph &lt;- data %&gt;%&#10;       #filter(n &gt;= 6) %&gt;%&#10;       graph_from_data_frame&#10;      print(bigram_graph)&#10;    &#9;# bigram_graph &lt;- data.frame(bigram_graph)&#10;&#10;    &#9;library(ggraph)&#10;    &#9;set.seed(2017)&#10;&#10;    &#9;graph &lt;- ggraph(bigram_graph, layout = &quot;fr&quot;) +&#10;    &#9;  geom_edge_link() +&#10;    &#9;  geom_node_point() +&#10;    &#9;  geom_node_text(aes(label = name), vjust = 1, hjust =1)&#10;&#10;    &#9;setwd(&quot;/home/knecht&quot;)&#10;    &#9;#write.csv(graph, '/home/knecht/filtergraphtestWord1001')&#10;    &#9;png(filename=&quot;filtergraphtest001word1&quot;)&#10;    &#9;plot(graph)&#10;    &#9;dev.off()&#10;    &#9;    &#9;&#10;     return(list(ggraph))&#10;}&#10;"/>
              </operator>
              <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Filter Word 2" width="90" x="179" y="187">
                <parameter key="script" value="rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;&#10;&#9;bigrams_filtered &lt;- data %&gt;%&#10;&#9;  #count(word1, word2, sort = TRUE)&#10;&#9;  filter(word2 == &quot;umwelt&quot;) %&gt;%&#10;&#9;  count(word1, word2, sort =TRUE)&#10;&#9;  print(bigrams_filtered)&#10;   &#9;bigrams_filtered &lt;- data.frame(bigrams_filtered)&#10;    return(bigrams_filtered)&#10;}&#10;"/>
              </operator>
              <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Word2 Filter" width="90" x="313" y="187"/>
              <operator activated="true" class="r_scripting:execute_r" compatibility="7.2.000" expanded="true" height="82" name="Visual Word 2" width="90" x="447" y="187">
                <parameter key="script" value="# not finished&#10;&#10;rm_main = function(data)&#10;{&#10;&#9;library(dplyr)&#10;&#9;library(tidytext)&#10;&#9;library(tidyr)&#10;     library(igraph)&#10;&#10;     bigram_graph &lt;- data %&gt;%&#10;       #filter(n &gt;= 6) %&gt;%&#10;       graph_from_data_frame&#10;      print(bigram_graph)&#10;    &#9;# bigram_graph &lt;- data.frame(bigram_graph)&#10;&#10;    &#9;library(ggraph)&#10;    &#9;set.seed(2017)&#10;&#10;    &#9;graph &lt;- ggraph(bigram_graph, layout = &quot;fr&quot;) +&#10;    &#9;  geom_edge_link() +&#10;    &#9;  geom_node_point() +&#10;    &#9;  geom_node_text(aes(label = name), vjust = 1, hjust =1)&#10;&#10;    &#9;setwd(&quot;/home/knecht&quot;)&#10;    &#9;#write.csv(graph, '/home/knecht/filtergraphtestword2001')&#10;    &#9;png(filename=&quot;filtergraphtestword2001&quot;)&#10;    &#9;plot(graph)&#10;    &#9;dev.off()&#10;    &#9;    &#9;&#10;     return(list(ggraph))&#10;}&#10;"/>
              </operator>
              <connect from_port="in 1" to_op="Multiply" to_port="input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Filter Word 1" to_port="input 1"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Filter Word 2" to_port="input 1"/>
              <connect from_op="Filter Word 1" from_port="output 1" to_op="Word1 Filter" to_port="input"/>
              <connect from_op="Word1 Filter" from_port="output 1" to_op="Visual Word 1" to_port="input 1"/>
              <connect from_op="Word1 Filter" from_port="output 2" to_port="out 2"/>
              <connect from_op="Visual Word 1" from_port="output 1" to_port="out 1"/>
              <connect from_op="Filter Word 2" from_port="output 1" to_op="Word2 Filter" to_port="input"/>
              <connect from_op="Word2 Filter" from_port="output 1" to_op="Visual Word 2" to_port="input 1"/>
              <connect from_op="Word2 Filter" from_port="output 2" to_port="out 4"/>
              <connect from_op="Visual Word 2" from_port="output 1" to_port="out 3"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
              <portSpacing port="sink_out 4" spacing="0"/>
              <portSpacing port="sink_out 5" spacing="0"/>
            </process>
          </operator>
          <connect from_port="in 1" to_op="Generate Bigrams" to_port="input 1"/>
          <connect from_op="Generate Bigrams" from_port="output 1" to_op="Seperate Bigrams" to_port="input 1"/>
          <connect from_op="Seperate Bigrams" from_port="output 1" to_op="Seperated Bigrams" to_port="input"/>
          <connect from_op="Seperated Bigrams" from_port="output 1" to_op="Count Bigrams per Page" to_port="input 1"/>
          <connect from_op="Seperated Bigrams" from_port="output 2" to_op="Count all Bigrams" to_port="input 1"/>
          <connect from_op="Count Bigrams per Page" from_port="output 1" to_op="Counted Bigrams per Page" to_port="input"/>
          <connect from_op="Count all Bigrams" from_port="output 1" to_port="out 3"/>
          <connect from_op="Counted Bigrams per Page" from_port="output 1" to_op="Visual Bigrams" to_port="input 1"/>
          <connect from_op="Counted Bigrams per Page" from_port="output 2" to_port="out 2"/>
          <connect from_op="Visual Bigrams" from_port="output 1" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
          <portSpacing port="sink_out 4" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Crawler" from_port="out 1" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Splitting" to_port="in 1"/>
      <connect from_op="Splitting" from_port="out 1" to_port="result 1"/>
      <connect from_op="Splitting" from_port="out 2" to_op="Neighborhood co-ocurrence" to_port="in 1"/>
      <connect from_op="Neighborhood co-ocurrence" from_port="out 1" to_port="result 2"/>
      <connect from_op="Neighborhood co-ocurrence" from_port="out 2" to_port="result 3"/>
      <connect from_op="Neighborhood co-ocurrence" from_port="out 3" to_port="result 4"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
    </process>
  </operator>
</process>
Maybe there someone, who can give me a hint.
Regards
Tobias
Find more posts tagged with

AI Studio
Comments

There are no comments yet