🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Interpreting Extracting Topics from Data (LDA)

User: "FreeThoughts"
New Altair Community Member
Updated by Jocelyn
Hi im currently working with the LDA operator from the Operator took box after accomplishing extracting the data I wanted to properly interpret the data. Was wondering if you could help me my code is shown below. The issue I have understanding it what words fall under what specific topic without slowly having to analysis by hand. As well as the open visualization for each topic

<?xml version="1.0" encoding="UTF-8"?><process version="9.4.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.4.001" expanded="true" height="68" name="Retrieve Practice LDA" width="90" x="45" y="85">
        <parameter key="repository_entry" value="//Local Repository/data/Practice LDA"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="246" y="85">
        <parameter key="create_word_vector" value="true"/>
        <parameter key="vector_creation" value="Binary Term Occurrences"/>
        <parameter key="add_meta_information" value="true"/>
        <parameter key="keep_text" value="true"/>
        <parameter key="prune_method" value="percentual"/>
        <parameter key="prune_below_percent" value="1.35"/>
        <parameter key="prune_above_percent" value="100.0"/>
        <parameter key="prune_below_rank" value="0.05"/>
        <parameter key="prune_above_rank" value="0.95"/>
        <parameter key="datamanagement" value="double_sparse_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="select_attributes_and_weights" value="true"/>
        <list key="specify_weights">
          <parameter key="LinguisticSentence" value="1.0"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="text:transform_cases" compatibility="8.2.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="45" y="34">
            <parameter key="transform_to" value="lower case"/>
          </operator>
          <operator activated="true" class="text:tokenize" compatibility="8.2.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="179" y="34">
            <parameter key="mode" value="non letters"/>
            <parameter key="characters" value=".:"/>
            <parameter key="language" value="English"/>
            <parameter key="max_token_length" value="3"/>
          </operator>
          <operator activated="true" class="open_file" compatibility="9.4.001" expanded="true" height="68" name="Open File" width="90" x="313" y="187">
            <parameter key="resource_type" value="file"/>
            <parameter key="filename" value="C:\Users\Christian\Downloads\Stopwords.xlsx"/>
          </operator>
          <operator activated="true" class="text:filter_by_length" compatibility="8.2.000" expanded="true" height="68" name="Filter Tokens (by Length) (2)" width="90" x="313" y="34">
            <parameter key="min_chars" value="2"/>
            <parameter key="max_chars" value="100"/>
          </operator>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="8.2.000" expanded="true" height="82" name="Filter Stopwords (Dictionary) (2)" width="90" x="447" y="34">
            <parameter key="case_sensitive" value="false"/>
            <parameter key="encoding" value="SYSTEM"/>
          </operator>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.2.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="581" y="34"/>
          <operator activated="true" class="text:stem_porter" compatibility="8.2.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="715" y="34"/>
          <connect from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Filter Tokens (by Length) (2)" to_port="document"/>
          <connect from_op="Open File" from_port="file" to_op="Filter Stopwords (Dictionary) (2)" to_port="file"/>
          <connect from_op="Filter Tokens (by Length) (2)" from_port="document" to_op="Filter Stopwords (Dictionary) (2)" to_port="document"/>
          <connect from_op="Filter Stopwords (Dictionary) (2)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
          <connect from_op="Stem (Porter)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="operator_toolbox:lda_exampleset" compatibility="2.2.000" expanded="true" height="124" name="Extract Topics from Data (LDA)" width="90" x="514" y="85">
        <parameter key="text_attribute" value="text"/>
        <parameter key="number_of_topics" value="10"/>
        <parameter key="use_alpha_heuristics" value="true"/>
        <parameter key="alpha_sum" value="0.1"/>
        <parameter key="use_beta_heuristics" value="true"/>
        <parameter key="beta" value="0.01"/>
        <parameter key="optimize_hyperparameters" value="true"/>
        <parameter key="optimize_interval_for_hyperparameters" value="10"/>
        <parameter key="top_words_per_topic" value="5"/>
        <parameter key="iterations" value="1000"/>
        <parameter key="reproducible" value="false"/>
        <parameter key="enable_logging" value="false"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <connect from_op="Retrieve Practice LDA" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Extract Topics from Data (LDA)" to_port="exa"/>
      <connect from_op="Extract Topics from Data (LDA)" from_port="exa" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>