Hi,
I'd like to use Information Extraction Plugin 1.0.2 for Rapidminer, but i have a problem.
I'd like to create the parse tree with TreeCreatorAndPreprocessor Operator and visualize it with ParseTreeVisualizer Operator.
I'm using only one sentence stored in an text-type-attribute (the attribute name is textattribute in my workflow).
I have two workflows:
1; if the tree is already given by its string representation like Stanford Parser: (ROOT (S (NP (NNP Felix)) (VP (VBD went) (PP (TO to) (NP (NNP New) (NNP York))) (S (VP (TO to) (VP (VB visit) (NP (NP (DT the) (NN statue)) (PP (IN of ) (NP (NN liberty)))))))) (. .)))
2; if a sentence (Felix went to New York to visit the statue of liberty.) is contained in the attribute selected by the parameter valueAttribute
The TreeCreatorAndPreprocessor Operator generates an empty (?) structID, and the object-attribute which is used to store the parse tree can not be created, and the parse-tree Visualizer prints nothing.In the 1; case, the "needParsing option" need not to be selected, because my sentence are already parsed.
In the 2; case, the "needParsing option" need to be selected, because i have only simple sentence without parsing , but where can i download the MODELFILE if i want to create the parse tree? I found the Stanford Parser modelfile: englisgPCFG.ser file (i attached it in the llink), is this correct for the modelfile? If it is not correct modelfile, where can i download?
Could you help me with your answer or correct my workflow that it can create and visualize the parse tree or send me a sample process where the TreeCreatorAndPreprocessor Operator works?
I tried to use several verison of Rapidminer (5.0, 5.1, 5.2 and the newest one too).
I attached my two worflow rapidminer files with my two input text files, downloaded modelfile, and the plugin.
http://dobi.web.elte.hu/rapidminer_workflow.rarThank you for your help in advance,
Best regards
Hadobás András
first workflow xml file:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="641" width="567">
<operator activated="true" class="text:read_document" compatibility="5.2.003" expanded="true" height="60" name="Read Document" width="90" x="45" y="30">
<parameter key="file" value="D:\rapidminer_workflow\first workflow StructueredTreeString.txt"/>
</operator>
<operator activated="true" class="text:documents_to_data" compatibility="5.2.003" expanded="true" height="76" name="Documents to Data" width="90" x="179" y="30">
<parameter key="text_attribute" value="textattribute"/>
</operator>
<operator activated="false" class="informationExtraction:sentence_tokenizer" compatibility="1.0.000" expanded="true" height="76" name="SentenceTokenizer" width="90" x="179" y="120">
<parameter key="attribute" value="textattribute"/>
<parameter key="new token-name" value="sentenceattribute"/>
</operator>
<operator activated="false" class="text_to_nominal" compatibility="5.2.006" expanded="true" height="76" name="Text to Nominal" width="90" x="313" y="120">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="textattribute"/>
</operator>
<operator activated="false" class="informationExtraction:parsetree_visualizer" compatibility="1.0.000" expanded="true" height="76" name="ParseTreeVisualizer" width="90" x="447" y="210"/>
<operator activated="false" class="informationExtraction:html_tree_preprocessing" compatibility="1.0.000" expanded="true" height="60" name="HTMLTreePreprocessing" width="90" x="447" y="120">
<parameter key="valueAttribute" value="textattribute"/>
<parameter key="needParsing" value="true"/>
</operator>
<operator activated="true" class="informationExtraction:parsetree_preprocessing_new" compatibility="1.0.000" expanded="true" height="60" name="TreeCreatorAndPreprocessor" width="90" x="447" y="30">
<parameter key="valueAttribute" value="textattribute"/>
<parameter key="modelfile" value="D:\rapidminer_workflow\modelfile englishPCFG.ser"/>
<list key="poslist"/>
</operator>
<connect from_op="Read Document" from_port="output" to_op="Documents to Data" to_port="documents 1"/>
<connect from_op="Documents to Data" from_port="example set" to_op="TreeCreatorAndPreprocessor" to_port="example set input"/>
<connect from_op="TreeCreatorAndPreprocessor" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
second workflow:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="641" width="567">
<operator activated="true" class="text:read_document" compatibility="5.2.003" expanded="true" height="60" name="Read Document" width="90" x="45" y="30">
<parameter key="file" value="D:\rapidminer_workflow\second workflow UnStructuredString.txt"/>
</operator>
<operator activated="true" class="text:documents_to_data" compatibility="5.2.003" expanded="true" height="76" name="Documents to Data" width="90" x="179" y="30">
<parameter key="text_attribute" value="textattribute"/>
</operator>
<operator activated="false" class="informationExtraction:sentence_tokenizer" compatibility="1.0.000" expanded="true" height="76" name="SentenceTokenizer" width="90" x="179" y="120">
<parameter key="attribute" value="textattribute"/>
<parameter key="new token-name" value="sentenceattribute"/>
</operator>
<operator activated="false" class="text_to_nominal" compatibility="5.2.006" expanded="true" height="76" name="Text to Nominal" width="90" x="313" y="120">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="textattribute"/>
</operator>
<operator activated="false" class="informationExtraction:parsetree_visualizer" compatibility="1.0.000" expanded="true" height="76" name="ParseTreeVisualizer" width="90" x="447" y="210"/>
<operator activated="false" class="informationExtraction:html_tree_preprocessing" compatibility="1.0.000" expanded="true" height="60" name="HTMLTreePreprocessing" width="90" x="447" y="120">
<parameter key="valueAttribute" value="textattribute"/>
<parameter key="needParsing" value="true"/>
</operator>
<operator activated="true" class="informationExtraction:parsetree_preprocessing_new" compatibility="1.0.000" expanded="true" height="60" name="TreeCreatorAndPreprocessor" width="90" x="447" y="30">
<parameter key="valueAttribute" value="textattribute"/>
<parameter key="needParsing" value="true"/>
<parameter key="modelfile" value="D:\rapidminer_workflow\modelfile englishPCFG.ser"/>
<list key="poslist"/>
</operator>
<connect from_op="Read Document" from_port="output" to_op="Documents to Data" to_port="documents 1"/>
<connect from_op="Documents to Data" from_port="example set" to_op="TreeCreatorAndPreprocessor" to_port="example set input"/>
<connect from_op="TreeCreatorAndPreprocessor" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>